Preliminary analyses for microeukaryote tag-sequence survey.
Description: Investigate the diversity of single-celled microbial eukaryotic communities across several deep-sea hydrothermal vent sites (including NE Pacific, Caribbean). We plan to address questions related to the environmental factors that shape protistan community dynamics, and determine if patterns in species diversity and distribution vary at different deep-sea habitats. These questions will be addressed using similarly generated metabarcoding data from several distinct hydrothermal vents. Along with characterizing community structure, we plan to evaluate interactions between protist species (to identify putative predator-prey or parasite-host relationships) and their environment (to explore their relationship to geochemical properties).
Questions to address
What is the general biogeography and distribution of the deep-sea hydrothermal vent microbial eukaryotic community?
What community structure features (i.e., species richness, proportion cosmopolitan versus endemic, species evenness) are shared across or unique to deep-sea hydrothermal vent sites?
What environmental features (i.e., temperature, geochemistry) influence microbial eukaryotic community diversity? Can we identify if certain environmental factors select for putative vent endemics?
Three sites in separate ocean regions.
metadata <- read.delim("data-input/samplelist-metadata.txt", na.strings = "")
# View(metadata)
# ?read.delim()
colnames(metadata)## [1] "ref_num" "SAMPLE" "VENT"
## [4] "COORDINATES" "SITE" "Sample_or_Control"
## [7] "SAMPLEID" "DEPTH" "SAMPLETYPE"
## [10] "Julies.Notes" "YEAR" "TEMP..C."
## [13] "pH" "PercSeawater" "Mg.mmol.kg.or.mM"
## [16] "H2.µM.or.µmol.L" "H2S..mmol.L..or.mM" "CH4..µmol.kg."
## [19] "ProkConc" "Sample_actual" "Type"
Filter to environmental data only.
env_deepsea <- metadata %>%
mutate_all(as.character) %>%
filter(Sample_or_Control == "Sample") %>%
filter(!(SAMPLETYPE == "Incubation")) %>%
filter(!(SAMPLETYPE == "Microcolonizer")) %>%
select(VENT, COORDINATES, SITE, SAMPLEID, DEPTH, SAMPLETYPE, YEAR, TEMP = starts_with("TEMP"), pH, PercSeawater, Mg = starts_with("Mg"), H2 = starts_with("H2."), H2S = starts_with("H2S"), CH4 = starts_with("CH4"), ProkConc) %>%
pivot_longer(cols = TEMP:ProkConc, names_to = "VARIABLE", values_to = "VALUE", values_drop_na = FALSE) %>%
distinct() %>%
group_by(VENT, COORDINATES, SITE, DEPTH, SAMPLETYPE, YEAR, VARIABLE)
# head(env_deepsea)
# unique(env_deepsea$VARIABLE)Units for the variables are as follows: - Temp = Celsius - Percent Seawater = % - Mg = mmol/kg (or mM) - H2 = µmol/L (or µM) - H2S = mmol/L (or mM) - CH4 = µmol/kg (or mM) - ProkConc = cells/ml
last updated Dec 29, 2021 * Needs updated H2 numbers for MCR from Jeff * All of this chemistry needs to be double checked again…
rm <- c("-", "", "nd", "bd", NA)
geochem_1 <- env_deepsea %>%
filter(VARIABLE != "ProkConc") %>%
filter(!(VALUE %in% rm)) %>%
mutate(VALUE = as.numeric(as.character(VALUE))) %>%
ggplot(aes(x = SAMPLETYPE, y = VALUE, fill = SITE, shape = SAMPLETYPE))+
geom_jitter(size = 3) +
facet_wrap(VARIABLE ~ ., scales = "free") +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black"))) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
plot.margin = margin(2, 1, 2, 1, "cm"),
strip.background = element_blank()) +
labs(x = "", y = "")
geochem_1geochem_violin <- env_deepsea %>%
filter(VARIABLE != "ProkConc") %>%
filter(!(VALUE %in% rm)) %>%
mutate(VALUE = as.numeric(as.character(VALUE))) %>%
ggplot(aes(x = SAMPLETYPE, y = VALUE, fill = SITE, shape = SAMPLETYPE)) +
geom_boxplot(alpha = 0.3, aes(group = SAMPLETYPE), fill = "grey", width = 0.3) +
geom_jitter(size = 2, width = 0.2) +
facet_wrap(VARIABLE ~ ., scales = "free") +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black"))) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
plot.margin = margin(2, 1, 2, 1, "cm"),
strip.background = element_blank()) +
labs(x = "", y = "")
geochem_violingeochem_2 <- env_deepsea %>%
filter(VARIABLE == "ProkConc") %>%
filter(!(VALUE %in% rm)) %>%
mutate(VALUE = as.numeric(as.character(VALUE))) %>%
ggplot(aes(x = SAMPLETYPE, y = VALUE, fill = SITE, shape = SAMPLETYPE)) +
geom_boxplot(alpha = 0.3, aes(group = SAMPLETYPE), fill = "grey", width = 0.3) +
geom_jitter(size = 2, width = 0.2) +
facet_wrap(VARIABLE ~ ., scales = "free") +
scale_y_log10() +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black"))) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
plot.margin = margin(2, 1, 2, 1, "cm"),
strip.background = element_blank()) +
labs(x = "", y = "")
geochem_2sampletype_order <- c("Background", "Plume", "Vent")
sampletype_symbol<- c(21, 23, 24)
site_order <- c("Axial", "GordaRidge", "Piccard", "VonDamm")
site_color <- c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")Generate output table for all parameters measured
# colnames(metadata)
geomchem_table <- metadata %>%
mutate_all(as.character) %>%
filter(Sample_or_Control == "Sample") %>%
filter(!(SAMPLETYPE == "Incubation")) %>%
filter(!(SAMPLETYPE == "Microcolonizer")) %>%
select(SAMPLE, VENT, COORDINATES, SITE, SAMPLEID, DEPTH, SAMPLETYPE, YEAR, TEMP = starts_with("TEMP"), pH, PercSeawater, Mg = starts_with("Mg"), H2 = starts_with("H2."), H2S = starts_with("H2S"), CH4 = starts_with("CH4"), ProkConc)
geomchem_table## SAMPLE VENT
## 1 Axial_BSW1500m_BSW1500m_2015 Deep seawater
## 2 Axial_AnemonePlume_AnemonePlume_2015 Anemone Plume
## 3 Axial_Anemone_FS891_2013 Anemone
## 4 Axial_Boca_FS905_2013 Boca
## 5 Axial_Dependable_FS900_2013 Dependable
## 6 Axial_ElGuapo_FS896_2013 El Guapo
## 7 Axial_Marker113_FS903_2013 Marker113
## 8 Axial_Marker113_FS906_2014 Marker113
## 9 Axial_Marker113_FS915_2015 Marker113
## 10 Axial_Marker33_FS904_2013 Marker33
## 11 Axial_Marker33_FS908_2014 Marker33
## 12 Axial_N3Area_FS898_2013 N3Area
## 13 Axial_Skadi_FS902_2013 Skadi
## 14 Axial_Skadi_FS910_2014 Skadi
## 15 GordaRidge_BSW020_sterivex_2019_REPa Deep seawater
## 16 GordaRidge_BSW056_sterivex_2019_REPb Deep seawater
## 17 GordaRidge_BSW081_sterivex_2019 Shallow seawater
## 18 GordaRidge_Plume001_sterivex_2019_REPa Near vent BW
## 19 GordaRidge_Plume001_sterivex_2019_REPb Near vent BW
## 20 GordaRidge_Plume036_sterivex_2019_REPb Candelabra Plume
## 21 GordaRidge_Plume096_sterivex_2019 Mt Edwards Plume
## 22 GordaRidge_Vent009_SUPRS1_2019 Mt Edwards
## 23 GordaRidge_Vent010_SUPRS2_2019 Mt Edwards
## 24 GordaRidge_Vent011_SUPRS3_2019 Mt Edwards
## 25 GordaRidge_Vent039_SUPRS1_2019 Venti Latte
## 26 GordaRidge_Vent040_SUPRS2_2019 Venti Latte
## 27 GordaRidge_Vent041_SUPRS3_2019 Venti Latte
## 28 GordaRidge_Vent086_SUPRS1_2019 Candelabra
## 29 GordaRidge_Vent087_SUPRS2_2019 Candelabra
## 30 GordaRidge_Vent088_SUPRS3_2019 Candelabra
## 31 GordaRidge_Vent105_SUPRS9_2019 Sir Ventsalot
## 32 GordaRidge_Vent106_SUPRS10_2019 Sir Ventsalot
## 33 GordaRidge_Vent107_SUPRS11_2019 Sir Ventsalot
## 34 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 BSW
## 35 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 Plume
## 36 68_MCR_Piccard_HOG_LotsOShrimp_J21242HOG14_0_Jun2021 LotsOShrimp
## 37 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 Shrimpocalypse
## 38 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 LotsOShrimp
## 39 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 BSW
## 40 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 Plume
## 41 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 ArrowLoop
## 42 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 WhiteCastle
## 43 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 MustardStand
## 44 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 Rav2
## 45 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 OldManTree
## 46 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 ShrimpHole
## 47 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 X18
## 48 79_MCR_VonDamm_HOG_Bartizan_J21244HOG12_5_Jun2021 Bartizan
## 49 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 Rav2
## COORDINATES SITE SAMPLEID DEPTH SAMPLETYPE YEAR
## 1 46.27389 N 129.79548 W Axial BSW1500m 1520 Background 2015
## 2 45.9335667 N 130.013667 W Axial AnemonePlume <NA> Plume 2015
## 3 45.9332 N 130.0137 W Axial FS891 1542 Vent 2013
## 4 45.927692 N 129.982482 W Axial FS905 <NA> Vent 2013
## 5 45.87992 N 129.80294 W Axial FS900 <NA> Vent 2013
## 6 45.926575 N 129.979479 W Axial FS896 <NA> Vent 2013
## 7 45.9227 N 129.9882 W Axial FS903 1520 Vent 2013
## 8 45.9227 N 129.9882 W Axial FS906 1518 Vent 2014
## 9 45.9227 N 129.9882 W Axial FS915 1520 Vent 2015
## 10 45.9332 N 129.9822 W Axial FS904 1516 Vent 2013
## 11 45.9332 N 129.9822 W Axial FS908 1514 Vent 2014
## 12 45.943716 N 129.985163 W Axial FS898 <NA> Vent 2013
## 13 45.923383 N 129.982853 W Axial FS902 <NA> Vent 2013
## 14 45.923383 N 129.982853 W Axial FS910 <NA> Vent 2014
## 15 42.7495125 N 126.710294 W GordaRidge BSW020 2010 Background 2019
## 16 42.76060928 N 126.7047891 W GordaRidge BSW056 2010 Background 2019
## 17 42.7546 N 126.743 W GordaRidge BSW081 150 Background 2019
## 18 42.75500527 N 126.709891 W GordaRidge Plume001 2745 Background 2019
## 19 42.75500527 N 126.709891 W GordaRidge Plume001 2745 Background 2019
## 20 42.7551105 N 126.709442 W GordaRidge Plume036 2725 Plume 2019
## 21 42.75465646 N 126.7091669 W GordaRidge Plume096 2707 Plume 2019
## 22 42.75464576 N 126.7090451 W GordaRidge Vent009 2707 Vent 2019
## 23 42.754692 N 126.7090115 W GordaRidge Vent010 2707 Vent 2019
## 24 42.754692 N 126.7090115 W GordaRidge Vent011 2707 Vent 2019
## 25 42.7548145 N 126.7088945 W GordaRidge Vent039 2708 Vent 2019
## 26 42.754858 N 126.708922 W GordaRidge Vent040 2708 Vent 2019
## 27 42.754858 N 126.708922 W GordaRidge Vent041 2708 Vent 2019
## 28 42.75506794 N 126.709613 W GordaRidge Vent086 2730 Vent 2019
## 29 42.75503414 N 126.7094585 W GordaRidge Vent087 2730 Vent 2019
## 30 42.75503414 N 126.7094585 W GordaRidge Vent088 2730 Vent 2019
## 31 42.761202 N 126.7054775 W GordaRidge Vent105 2732 Vent 2019
## 32 42.76131802 N 126.7054541 W GordaRidge Vent106 2732 Vent 2019
## 33 42.76131802 N 126.7054541 W GordaRidge Vent107 2732 Vent 2019
## 34 18.547980, -81.718180 Piccard <NA> 4776 Background 2020
## 35 18.546767, -81.718200 Piccard <NA> 4944 Plume 2020
## 36 18.546789, -81.718356 Piccard <NA> 4967 Vent 2020
## 37 18.546674, -81.717806 Piccard <NA> 4945 Vent 2020
## 38 18.546789, -81.718356 Piccard <NA> 4967 Vent 2020
## 39 18.374183, -81.781533 VonDamm <NA> 2400 Background 2020
## 40 18.377600, -81.799317 VonDamm <NA> 1979 Plume 2020
## 41 18.376659, -81.797986 VonDamm <NA> 2309 Vent 2020
## 42 18.377005, -81.798088 VonDamm <NA> 2307 Vent 2020
## 43 18.375130, -81.797488 VonDamm <NA> 2374 Vent 2020
## 44 18.375112, -81.797180 VonDamm <NA> 2389.6 Vent 2020
## 45 18.375069, -81.797678 VonDamm <NA> 2375.8 Vent 2020
## 46 18.374893, -81.797441 VonDamm <NA> 2376 Vent 2020
## 47 18.374810, -81.797411 VonDamm <NA> 2377 Vent 2020
## 48 18.798096, -81.377907 VonDamm <NA> 2307 Vent 2020
## 49 18.375254, -81.797176 VonDamm <NA> 2388.9 Vent 2020
## TEMP pH PercSeawater Mg H2 H2S CH4 ProkConc
## 1 2 7.8 100 52.4 0.002 0 0.002 2.50E+04
## 2 <NA> <NA> <NA> <NA> <NA> 0 <NA> <NA>
## 3 28.2 5.5 88.9 46.5 13.9 1.0604496 14.8 4.10E+05
## 4 6.8 <NA> <NA> <NA> <NA> 0 <NA> <NA>
## 5 53.2 <NA> <NA> <NA> <NA> 0 <NA> <NA>
## 6 26.1 <NA> <NA> <NA> <NA> 0 <NA> <NA>
## 7 24.5 6.2 96 50.2 1.4 0.7462044 16.9 4.60E+05
## 8 24.3 5.8 96.4 48.8 1 0.5691216 38 6.80E+05
## 9 25.4 6.6 95.8 50.2 0.3 0.59184552 22.3 1.50E+06
## 10 27.3 5.5 87.7 45.9 1.5 0.557862 19 4.20E+05
## 11 18.5 5.6 91.9 48.1 1.5 0.2676714 6.4 3.90E+05
## 12 20.1 <NA> <NA> <NA> <NA> 0 <NA> <NA>
## 13 36 <NA> <NA> <NA> <NA> 0 <NA> <NA>
## 14 <NA> <NA> <NA> <NA> <NA> 0 <NA> <NA>
## 15 1.8 7.8 100 51.8 - - <NA> 3.91E+04
## 16 1.8 7.8 100 51.8 - - <NA> 3.91E+04
## 17 8.6 <NA> 100 51.8 - - <NA> <NA>
## 18 1.7 7.8 100 51.8 - - <NA> 51959.11
## 19 1.7 7.8 100 51.8 - - <NA> 51959.11
## 20 1.7 <NA> <NA> <NA> - - <NA> 7.69E+04
## 21 1.8 <NA> <NA> <NA> - - <NA> <NA>
## 22 40 6 83 42.6 127 1.01 9.8671356 51439.52
## 23 40 6 83 42.6 127 1.01 9.8671356 51439.52
## 24 40 6 83 42.6 127 1.01 9.8671356 51439.52
## 25 11 6.4 97 50.9 bd nd 0.879249707 111192.5
## 26 11 6.4 97 50.9 bd nd 0.879249707 111192.5
## 27 11 6.4 97 50.9 bd nd 0.879249707 111192.5
## 28 79 5.5 88 35.7 21.9 nd 23.15357562 55076.66
## 29 79 5.5 88 35.7 21.9 nd 23.15357562 55076.66
## 30 79 5.5 88 35.7 21.9 nd 23.15357562 55076.66
## 31 72 <NA> 98 50.8 - - - 52998.29
## 32 72 <NA> 98 50.8 - - - 52998.29
## 33 72 <NA> 98 50.8 - - - 52998.29
## 34 4.46 - 100 52.5 - - <NA> 11860.187
## 35 4.46 - - - - - <NA> 51429.13
## 36 19 6.32 97.185 51.05 11350 - 11.4 53878.136
## 37 85 5.9675 81.6 42.87 0 - 21.4 238585.68
## 38 36 5.92 97.185 51.05 11350 - 11.4 53878.136
## 39 4.181 - - - - - <NA> 34705.9161
## 40 4.208 - - - - - <NA> 16478.313
## 41 137 5.845 43.95 23.085 9550 1.655 1585 10369.792
## 42 108 5.635 27.04 14.205 7100 1.675 2200 <NA>
## 43 108 5.85667 41.915 22.02 8750 1.675 1624 56677
## 44 94 5.77 29.63 15.565 5000 1.545 1850 <NA>
## 45 121.6 5.82 28.605 15.025 5650 1.685 1940 <NA>
## 46 21 7.72 96.36 50.62 5400 - 213 <NA>
## 47 48 6.97666667 58.885 30.93 1740 2.11 1280 111429.781
## 48 129 5.72 35.235 18.51 4600 1.655 1580 16163.441
## 49 98.2 5.7825 29.63 15.565 5000 1.545 1850 <NA>
# write_delim(geomchem_table, file = "table-geochem-params.txt", delim = "\t")Datasets included: - Gorda Ridge 2019 cruise - Axial Seamount time series - 2013, 2014, & 2015 - Mid-Cayman Rise 2020 cruise
All data generated from extracted RNA, reverse transcribed to cDNA and amplified with primers that target the V4 hypervariable region on the 18S rRNA gene.
Analysis done with QIIME2, kept 40-60% of the sequences through the QC process and generated Amplicon Sequence Variants (ASVs) with DADA2. Taxonomic assignment done with vsearch using the PR2 database (v4.14) at 80% identity. See the seq-analysis directory for QIIME2 code.
After determining ASVs for each sequence run, ASV tables were merged.
merged_tax <- read_delim("data-input/taxonomy.tsv", delim = "\t")
merged_asv <- read_delim("data-input/microeuk-merged-asv-table.tsv", delim = "\t", skip = 1)
# head(merged_tax)Still want to find more metadata. As of Dec 30, got as much geochem data available, but can add in more metadata for the MCR cruise. Need to confirm values from Jeff
metadata_formatted <- metadata %>%
mutate_all(as.character) %>%
filter(Sample_or_Control == "Sample") %>%
filter(!(SAMPLETYPE == "Incubation")) %>%
filter(!(SAMPLETYPE == "Microcolonizer")) %>%
select(SAMPLE, VENT, COORDINATES, SITE, SAMPLEID, DEPTH, SAMPLETYPE, YEAR, TEMP = starts_with("TEMP"), pH, PercSeawater, Mg = starts_with("Mg"), H2 = starts_with("H2."), H2S = starts_with("H2S"), CH4 = starts_with("CH4"), ProkConc, Sample_or_Control)Remove samples from Gorda Ridge microcolonizers and from the FLP experiments (Gorda Ridge and Mid-Cayman Rise).
asv_wtax <- merged_asv %>%
select(FeatureID = '#OTU ID', everything()) %>%
pivot_longer(cols = !FeatureID,
names_to = "SAMPLE", values_to = "value") %>%
left_join(merged_tax, by = c("FeatureID" = "Feature ID")) %>%
left_join(metadata_formatted) %>%
filter(!grepl("Siders_", SAMPLE)) %>%
filter(SAMPLETYPE != "Incubation") %>%
filter(SAMPLETYPE != "Microcolonizer") %>%
mutate(DATASET = case_when(
grepl("_GR_", SAMPLE) ~ "GR",
grepl("Gorda", SAMPLE) ~ "GR",
grepl("_MCR_", SAMPLE) ~ "MCR",
grepl("Axial", SAMPLE) ~ "Axial",
TRUE ~ "Control or blank")) %>%
separate(Taxon, c("Domain", "Supergroup",
"Phylum", "Class", "Order",
"Family", "Genus", "Species"), sep = ";", remove = FALSE) %>%
unite(SAMPLENAME, SITE, SAMPLETYPE, YEAR, VENT, SAMPLEID, sep = " ", remove = FALSE)
# View(asv_wtax)
# head(asv_wtax) ## Complete ASV table with full taxonomy names and annotated sample informationBarplots to show total number of sequences and total number of ASVs.
Total number of sequences and ASVs parallel each other. The Axial and Gorda Ridge data were run on the same sequence run, with Mid-Cayman Rise run on a separate MiSeq run - so the average number of sequences (and ASVs) varies between these two runs. A few samples have too few sequences, they will be removed below.
This newest version of PR2 has bacteria and archaea in it. Very, very few were assigned to this. Majority assigned to eukaryotes.
# head(asv_wtax)
library(viridis)
plot_grid(
# Total number of ASVs
asv_wtax %>%
filter(value > 0) %>%
filter(Sample_or_Control == "Sample") %>%
ggplot(aes(x = SAMPLENAME)) +
geom_bar(stat = "count", width = 0.9) +
labs(y = "Total ASVs per sample", x = "") +
coord_flip() +
scale_y_continuous(position = "right") +
theme_linedraw() +
facet_grid(DATASET ~ ., scale = "free", space = "free") +
theme(axis.text.x = element_text(angle = 0, hjust = 1, vjust = 1),
axis.text.y = element_text(angle = 0, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black")),
asv_wtax %>%
filter(Sample_or_Control == "Sample") %>%
group_by(SAMPLENAME, SITE, Domain, DATASET) %>%
summarise(SUM_SEQ_DOMAIN = sum(value)) %>%
ggplot(aes(x = SAMPLENAME, y = SUM_SEQ_DOMAIN, fill = Domain)) +
geom_bar(stat = "identity", color = "black", width = 0.9) +
labs(y = "Total sequences per sample", x = "") +
coord_flip() +
viridis::scale_fill_viridis(discrete = TRUE) +
scale_y_continuous(position = "right") +
theme_linedraw() +
facet_grid(DATASET ~ ., scale = "free", space = "free") +
theme(axis.text.x = element_text(angle = 0, hjust = 1, vjust = 1),
axis.text.y = element_text(angle = 0, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right"),
ncol = 2, align = c("hv"), axis = c("lr"))table_raw_stats <- asv_wtax %>% filter(value > 0) %>%
group_by(SAMPLENAME, DATASET, SITE) %>%
summarise(SEQ_SUM = sum(value),
ASV_COUNT = n()) %>%
ungroup() %>%
gt(
groupname_col = c("DATASET", "SITE"),
rowname_col = "SAMPLENAME"
)
table_raw_stats
gtsave(table_raw_stats, filename = "seq_asv_count_nonQC.html", path = "output-tables/")After removing contaminate ASVs below, I will set threshold of 10,000 sequences- if a sample has fewer than this, chuck it.
Import sample description text file, import as phyloseq library, and remove potential contaminate ASVs and sequences. Catalog total number of ASVs and sequences removed from analysis.
# library(decontam); library(phyloseq)tax_matrix <- merged_tax %>%
select(FeatureID = `Feature ID`, Taxon) %>%
separate(Taxon, c("Domain", "Supergroup",
"Phylum", "Class", "Order",
"Family", "Genus", "Species"), sep = ";", remove = FALSE) %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
asv_matrix <- merged_asv %>%
select(FeatureID = '#OTU ID', everything()) %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
# Align row names for each matrix
rownames(tax_matrix) <- row.names(asv_matrix)
# Set rownames of metadata table to SAMPLE information
row.names(metadata) <- metadata$SAMPLE# Import asv and tax matrices
ASV = otu_table(asv_matrix, taxa_are_rows = TRUE)
TAX = tax_table(tax_matrix)
phylo_obj <- phyloseq(ASV, TAX)
# Import metadata as sample data in phyloseq
samplenames <- sample_data(metadata)
# join as phyloseq object
physeq_wnames = merge_phyloseq(phylo_obj, samplenames)
# colnames(ASV)
## Check
# physeq_wnames# When "Control" appears in "Sample_or_Control column, this is a negative control"
sample_data(physeq_wnames)$is.neg <- sample_data(physeq_wnames)$Sample_or_Control == "Control"# ID contaminants using Prevalence information
contam_prev <- isContaminant(physeq_wnames,
method="prevalence",
neg="is.neg",
threshold = 0.5, normalize = TRUE)
# Report number of ASVs IDed as contaminants
table(contam_prev$contaminant)0.5 - this threshold will ID contaminants in all samples that are more prevalent in negative controls than in positive samples.
As of Dec 30 2021: 56 ASVs deemed to be contaminant and will be removed.
# Subset contaminant ASVs
contams <- filter(contam_prev, contaminant == "TRUE")
list_of_contam_asvs <- as.character(row.names(contams))
# length(list_of_contam_asvs)
taxa_contam <- as.data.frame(tax_matrix) %>%
rownames_to_column(var = "FeatureID") %>%
filter(FeatureID %in% list_of_contam_asvs)
# head(taxa_contam)# View(asv_wtax)
asv_wtax_decon <- asv_wtax %>%
filter(!(FeatureID %in% list_of_contam_asvs)) %>%
filter(!(Sample_or_Control == "Control"))
tmp_orig <- (asv_wtax %>% filter(!(Sample_or_Control == "Control")))
# Stats on lost
x <- length(unique(tmp_orig$FeatureID)); x
y <- length(unique(asv_wtax_decon$FeatureID)); y
y-x
100*((y-x)/x) # 56 total ASVs lost
a <- sum(tmp_orig$value);a #3.817 million
b <- sum(asv_wtax_decon$value);b #3.799 million
100*((b-a)/a)
# Lost 0.47% of sequences from whole dataset.
## Subsample to clean ASVs
asv_wtax_wstats <- asv_wtax %>%
mutate(DECONTAM = case_when(
FeatureID %in% list_of_contam_asvs ~ "FAIL",
TRUE ~ "PASS"
))Started with 17934 ASVs, post-decontamination, we have 17878 (a loss of 56 ASVs).
Data started with 3817219 sequences, after removing 56 ASVs, we have 3788791 total sequences. There was a total loss of 0.74% of sequences.
plot_grid(asv_wtax_wstats %>%
filter(value > 0) %>%
ggplot(aes(x = SAMPLE, fill = DECONTAM)) +
geom_bar(stat = "count", width = 0.9, color = "black") +
labs(y = "Total ASVs") +
coord_flip() +
theme_linedraw() +
facet_grid(DATASET ~ ., scale = "free", space = "free") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "bottom"),
asv_wtax_wstats %>%
group_by(SAMPLE, SITE, DECONTAM, DATASET) %>%
summarise(SUM_SEQ_DOMAIN = sum(value)) %>%
ggplot(aes(x = SAMPLE, y = SUM_SEQ_DOMAIN, fill = DECONTAM)) +
geom_bar(stat = "identity", color = "black", width = 0.9) +
labs(y = "Total Sequences") +
coord_flip() +
theme_linedraw() +
facet_grid(DATASET ~ ., scale = "free", space = "free") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "bottom"),
ncol = 2)This plot shows the distribution of ASVs and sequences that failed or passed the decontamination step. Most obvious are the control samples that indicated the potentially contaminate ASVs.
# colnames(asv_wtax_wstats)
# unique(asv_wtax_wstats$SAMPLE)
sites <- c("Piccard", "VonDamm", "Axial", "GordaRidge")
asv_insitu <- asv_wtax_wstats %>% filter(Sample_or_Control != "Control") %>%
filter(SITE %in% sites) %>%
filter(!grepl("_expTf_", SAMPLE)) %>%
filter(value > 0) %>%
filter(DECONTAM == "PASS")
# Get quick stats on totals
sum(asv_insitu$value) # 3.8 million sequences
length(unique(asv_insitu$FeatureID)) #12,378 ASVsFinal in situ dataset includes 3.79 million sequences and 12,378 ASVs total.
Additional sample QC, check replicates, and determine if replicates should be averaged.
plot_grid(asv_insitu %>%
group_by(SAMPLENAME, VENT, DATASET, Domain) %>%
summarise(seqsum_var = sum(value),
asvcount_var = n()) %>%
pivot_longer(ends_with("_var"), names_to = "VARIABLE") %>%
ggplot(aes(x = SAMPLENAME, y = value, fill = Domain)) +
geom_bar(color = "black", stat = "identity", position = "fill") +
facet_grid(VARIABLE ~ DATASET, space = "free", scales = "free") +
scale_y_continuous(expand = c(0,0)) +
theme_linedraw() +
scale_fill_brewer(palette = "Paired") +
theme(strip.background = element_blank(), strip.text = element_text(color = "black"),
axis.text.x = element_text(color = "black", angle = 90, hjust = 1, vjust = 0.5),
legend.position = "bottom"),
asv_insitu %>%
group_by(SAMPLENAME, VENT, DATASET, Domain) %>%
summarise(seqsum_var = sum(value),
asvcount_var = n()) %>%
pivot_longer(ends_with("_var"), names_to = "VARIABLE") %>%
ggplot(aes(x = SAMPLENAME, y = value, fill = Domain)) +
geom_bar(color = "black", stat = "identity", position = "stack") +
facet_grid(VARIABLE ~ DATASET, space = "free_x", scales = "free") +
scale_y_continuous(expand = c(0,0)) +
theme_linedraw() +
scale_fill_brewer(palette = "Paired") +
theme(strip.background = element_blank(), strip.text = element_text(color = "black"),
axis.text.x = element_text(color = "black", angle = 90, hjust = 1, vjust = 0.5),
legend.position = "bottom"),
ncol = 2)asv_insitu %>%
filter(Domain == "Eukaryota") %>%
# unite(SampleIdentifier, VENT, SAMPLETYPE, sep = " ", remove = FALSE) %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
VENT, SITE, SAMPLETYPE, YEAR, DATASET) %>%
summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
group_by(SITE, SAMPLETYPE, VENT, Supergroup) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = VENT, y = SEQ_SUM, fill = Supergroup)) +
geom_bar(stat = "identity", position = "stack", color = "black", width = 0.9) +
facet_grid(. ~ SITE + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black")) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black"))Repeat taxonomy barplot, but with relative abundance
asv_insitu %>%
filter(Domain == "Eukaryota") %>%
# unite(SampleIdentifier, VENT, SAMPLETYPE, sep = " ", remove = FALSE) %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
VENT, SITE, SAMPLETYPE, YEAR, DATASET) %>%
summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
group_by(SITE, SAMPLETYPE, VENT, Supergroup) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = VENT, y = SEQ_SUM, fill = Supergroup)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(. ~ SITE + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black")) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black"))Filter samples so that the total number of sequences is greater than 20,000 sequences.
# head(asv_insitu)
# unique(asv_insitu$Sample_or_Control)
# hist(asv_insitu$value)
tmp <- (asv_insitu %>%
group_by(SAMPLE, SAMPLENAME) %>%
summarise(SUM = sum(value)) %>%
filter(SUM < 20000))
toofew <- as.character(unique(tmp$SAMPLE))
toofewSamples: Axial_Dependable_FS900_2013 and GordaRidge_BSW020_sterivex_2019_REPa removed due to too few sequences.
Final table reporting total sequences and ASVs for each sample.
asv_insitu_qc <- asv_insitu %>%
filter(!(SAMPLE %in% toofew)) %>%
filter(value > 0)
stats_seq_asv_postQC <- asv_insitu_qc %>%
group_by(SAMPLEID, VENT, DATASET, SITE, SAMPLETYPE, YEAR) %>%
summarise(SEQ_SUM = sum(value),
ASV_COUNT = n()) %>%
ungroup() %>%
gt(
groupname_col = c("DATASET", "SITE", "YEAR"),
rowname_col = "SAMPLEID"
) %>%
tab_header(title = "Final sequence & ASV count")
stats_seq_asv_postQC
# sum(asv_insitu_qc$value)
# length(unique(asv_insitu_qc$FeatureID))
gtsave(stats_seq_asv_postQC, filename = "output-tables/seq_asv_count_postQC.html")
# tmp <- asv_insitu_qc %>%
# group_by(SAMPLEID, VENT, DATASET, SITE, SAMPLETYPE, YEAR) %>%
# summarise(SEQ_SUM = sum(value),
# ASV_COUNT = n())
# mean(tmp$SEQ_SUM); range(tmp$SEQ_SUM)
# mean(tmp$ASV_COUNT); range(tmp$ASV_COUNT)
# View(filter(tmp))Set up analysis to classify each ASV based on distribution
# head(asv_insitu_qc)
# head(insitu_wide)
# unique(asv_insitu_qc$SAMPLETYPE)
# unique(asv_insitu_qc$SITE)
tax_asv_id <- asv_insitu_qc %>%
filter(value > 0) %>% #remove zero values
select(FeatureID, SITE, SAMPLETYPE) %>% # isolate only ASVs that are PRESENT at a site and sampletype
distinct() %>% #unique this, as presense = present in at least 1 rep (where applicable)
unite(sample_id, SITE, SAMPLETYPE, sep = "_") %>%
# select(-SITE) %>%
# distinct() %>%
add_column(present = 1) %>%
pivot_wider(names_from = sample_id, values_from = present, values_fill = 0) %>%
rowwise() %>%
mutate_at(vars(FeatureID), factor)Is an ASV present only at the vent site? plume? or background? What about background and plume only?
library(purrr)
any_cols <- function(tax_asv_id) reduce(tax_asv_id, `|`)
asv_class <- tax_asv_id %>%
mutate(vent = ifelse(any_cols(across(contains("_Vent"), ~ . > 0)), "VENT", ""),
plume= ifelse(any_cols(across(contains("_Plume"), ~ . > 0)), "PLUME", ""),
bsw = ifelse(any_cols(across(contains("_Background"), ~ . > 0)), "BSW", ""),
) %>%
unite(class_tmp, vent, plume, bsw, sep = "_", na.rm = TRUE) %>%
mutate(CLASS = case_when(
class_tmp == "VENT__" ~ "Vent only",
class_tmp == "_PLUME_" ~ "Plume only",
class_tmp == "__BSW" ~ "Background only",
class_tmp == "VENT__BSW" ~ "Vent & background",
class_tmp == "VENT_PLUME_BSW" ~ "Vent, plume, & background",
class_tmp == "VENT_PLUME_" ~ "Vent & plume",
class_tmp == "_PLUME_BSW" ~ "Plume & background"
)) %>%
select(FeatureID, CLASS) %>% distinct()
colnames(tax_asv_id)Binary data frame with 1 indicating presence of ASV (rows) in a given sample (columns)
Depending on prevalence of ASV, assign groupings of location.
asv_class_SITE <- tax_asv_id %>%
mutate(
# mcr = ifelse(any_cols(across(contains("Piccard") | contains("VonDamm"), ~ . > 0)), "MCR", ""),
picc = ifelse(any_cols(across(contains("Piccard"), ~ . > 0)), "Picc", ""),
vd = ifelse(any_cols(across(contains("VonDamm"), ~ . > 0)), "VD", ""),
axial = ifelse(any_cols(across(contains("Axial"), ~ . > 0)), "AxS", ""),
gr = ifelse(any_cols(across(contains("Gorda"), ~ . > 0)), "GR", "")
) %>%
# unite(class_tmp, mcr, axial, gr, sep = "_", na.rm = TRUE) %>%
unite(class_tmp, picc, vd, axial, gr, sep = "_", na.rm = TRUE) %>%
# unique(asv_class_SITE$class_tmp)
mutate(SITE_CLASS = case_when(
class_tmp == "___GR" ~ "Gorda Ridge only",
class_tmp == "__AxS_" ~ "Axial only",
class_tmp == "_VD__" ~ "Von Damm only",
class_tmp == "Picc_VD__" ~ "Piccard & Von Damm",
class_tmp == "Picc___" ~ "Piccard only",
class_tmp == "Picc_VD_AxS_" ~ "MCR & Axial",
class_tmp == "__AxS_GR" ~ "Axial & Gorda Ridge",
class_tmp == "_VD__GR" ~ "Von Damm & Gorda Ridge",
class_tmp == "_VD_AxS_GR" ~ "Von Damm, Axial, & Gorda Ridge",
class_tmp == "_VD_AxS_" ~ "Von Damm & Axial",
# class_tmp == "MCR__" ~ "Mid-Cayman Rise",
class_tmp == "Picc_VD__GR" ~ "MCR & Gorda Ridge",
class_tmp == "Picc__AxS_GR" ~ "Piccard, Axial, & Gorda Ridge",
class_tmp == "Picc___GR" ~ "Piccard & Gorda Ridge",
class_tmp == "Picc__AxS_" ~ "Piccard & Axial",
class_tmp == "Picc_VD_AxS_GR" ~ "All sites"
)) %>%
select(FeatureID, SITE_CLASS) %>% distinct()
# View(select(asv_class_SITE, SITE_CLASS) %>% distinct())Combine together with original ASV table
insitu_asv_wClass <- asv_insitu_qc %>%
left_join(asv_class) %>%
left_join(asv_class_SITE)
# head(insitu_asv_wClass)Visualize the total number of ASVs in background, plume, versus background.
# head(asv_insitu_qc)
# svg("bubbles.svg", h = 4, w = 8)
asv_insitu_qc %>%
select(DATASET, FeatureID, SAMPLETYPE) %>%
group_by(DATASET, SAMPLETYPE) %>%
summarise(COUNT = n_distinct(FeatureID)) %>%
ggplot(aes(x = DATASET, y = SAMPLETYPE, fill = SAMPLETYPE)) +
geom_point(aes(size = COUNT), shape = 21, color = "black") +
scale_size_continuous(range = c(4,20)) +
scale_fill_viridis_d(option = "B") +
theme_void() +
theme(legend.position = "right",
axis.text = element_text(color = "black"))
# dev.off()Bubble plot reporting the total number of ASVs found in the vent, plume, versus background. At each site, the vent protist population had a higher total number of ASVs.
Repeat visualization by ASV distribution category.
# head(insitu_asv_wClass)
insitu_asv_wClass %>%
select(DATASET, FeatureID, SAMPLETYPE, CLASS) %>%
group_by(DATASET, SAMPLETYPE, CLASS) %>%
summarise(COUNT = n_distinct(FeatureID)) %>%
ggplot(aes(x = DATASET, y = SAMPLETYPE, fill = SAMPLETYPE)) +
geom_point(aes(size = COUNT), shape = 21, color = "black") +
scale_size_continuous(range = c(4,20)) +
scale_fill_viridis_d(option = "B") +
theme_void() +
theme(legend.position = "right",
axis.text.x = element_text(color = "black"),
axis.title.y = element_blank()) +
facet_grid(SAMPLETYPE + CLASS ~ ., scales = "free", space = "free") +
labs(x = "", y = "", title = "Total number of ASVs by distribution & sample type")Repeated bubble plot reports the total number of ASVs in the vent, plume, and background - but now further separated by distribution (i.e., if an ASV was found only in the vent and plume = “Vent & plume”). The largest portion of ASVs were found only at the vent sites (Vent only).
Categories for ASV distribution:
unique(insitu_asv_wClass$CLASS)
unique(insitu_asv_wClass$SITE_CLASS)
length(unique(insitu_asv_wClass$FeatureID))
sum(insitu_asv_wClass$value)Checkpoint to save working dataframes.
save(asv_insitu, asv_insitu_qc, insitu_asv_wClass, file = "asv-tables-processed-18102021.RData")To explore microbial eukaryotic community diversity at all three sites, below functions have been written to pass 18S data for each site through the same analysis. This will be done for all sites together and for them individually.
Sections below highlight Axial Seamount, Mid-Cayman Rise, and Gorda Ridge data individually.
axial <- c("Axial")
mcr <- c("VonDamm", "Piccard")
gr <- c("GordaRidge")
all <- c("Axial", "VonDamm", "Piccard", "GordaRidge")
load("asv-tables-processed-18102021.RData", verbose = TRUE)## Loading objects:
## asv_insitu
## asv_insitu_qc
## insitu_asv_wClass
Create a bar plot showing the relative sequence abundance of 18S results to the Supergroup and Phylum level. Function averages across replicates and then sums to the phylum and supergroup level. Bar plot shows the relative sequence abundance.
make_bar_relabun <- function(df, selection){
df_out <- df %>%
filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
VENT, SITE, SAMPLETYPE, YEAR, DATASET) %>%
summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup()
supergroup <- df_out %>%
group_by(SITE, SAMPLETYPE, VENT, YEAR, Supergroup) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = VENT, y = SEQ_SUM, fill = Supergroup)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
scale_y_continuous(expand = c(0,0)) +
# scale_fill_brewer(palette = "Set2") +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
labs(x = "", y = "Relative abundance")
phylum <- df_out %>%
unite(SupergroupPhylum, Supergroup, Phylum, sep = "-") %>%
group_by(SITE, SAMPLETYPE, VENT, YEAR, SupergroupPhylum) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = VENT, y = SEQ_SUM, fill = SupergroupPhylum)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black", "white", "#969696", "#525252", "#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black", "white")) +
labs(x = "", y = "Relative abundance")
supergroup + phylum + patchwork::plot_layout(ncol = 1)
}
# make_bar_relabun(insitu_asv_wClass, axial)Relative abundance plots are misleading, as this tag-sequence data is compositional. To combat this, we can also perform a center log-ratio transformation of the sequence counts. This tile plot (or heat map) will show the relationship from the data mean. Positive values thus demonstrate an increase in the taxa, while negative values illustrate the opposite.
Ahead of the CLR transformation, average across replicates, then sum to the Class level. THEN perform CLR transformation and plot as heat map.
make_clr_trans_tile <- function(df, selection){
df_wide <- df %>%
filter(SITE %in% selection) %>%
# df_wide <- insitu_asv_wClass %>%
# filter(SITE %in% axial) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
# Sum to the Order taxonomic classification
unite(SAMPLENAME_2, SAMPLENAME, VENT, sep = "_") %>%
group_by(SAMPLENAME_2, Supergroup, Phylum, Class) %>%
summarise(CLASS_SUM = sum(AVG)) %>%
unite(CLASS, Supergroup, Phylum, Class, sep = " ") %>%
select(CLASS, SAMPLENAME_2, CLASS_SUM) %>%
pivot_wider(names_from = SAMPLENAME_2, values_from = CLASS_SUM, values_fill = 0) %>%
column_to_rownames(var = "CLASS")
## Take wide data frame and CLR transform, pivot to wide, and plot
data.frame(compositions::clr(df_wide)) %>%
rownames_to_column(var = "CLASS") %>%
pivot_longer(cols = starts_with(selection), values_to = "CLR", names_to = "SAMPLENAME_2") %>%
separate(SAMPLENAME_2, c("SAMPLENAME", "VENT"), sep = "_") %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
# SITE == "Piccard" ~ "Piccard",
# SITE == "VonDamm" ~ "Von Damm",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
separate(CLASS, c("Supergroup", "Phylum", "Class"), sep = " ", remove = FALSE) %>%
ggplot(aes(x = SAMPLE, y = Class, fill = CLR)) +
geom_tile(color = "#252525") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black",size = 8),
axis.text.y = element_text(color = "black", size = 8),
strip.background = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 0.5, angle = 0),
# strip.text.x = element_blank(),
legend.title = element_blank()) +
labs(x = "", y = "") +
scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", na.value = "grey50") +
facet_grid(Supergroup + Phylum ~ SAMPLETYPE, space = "free", scales = "free")
}Similar to aove, the first step in this function transforms data using CLR (to ASV level though). First plot will show eigen values (scree plot to determine if 2 vs. 3 dimensions is best for data). Then function extracts data points and creates PCA plot.
make_pca <- function(df, selection){
df_wide_asv <- df %>%
# df_wide_asv <- insitu_asv_wClass %>%
filter(SITE %in% selection) %>%
# filter(SITE %in% axial) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SAMPLETYPE, REGION, VENTNAME, sep = "_", remove = FALSE) %>%
group_by(FeatureID, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLE")
# look at eigenvalues
pca_lr <- prcomp(data.frame(compositions::clr(df_wide_asv)))
variance_lr <- (pca_lr$sdev^2)/sum(pca_lr$sdev^2)
## View bar plot
barplot(variance_lr, main = "Log-Ratio PCA Screeplot", xlab = "PC Axis", ylab = "% Variance",
cex.names = 1.5, cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5)
## Extract PCR points
data.frame(pca_lr$x, SAMPLE = rownames(pca_lr$x)) %>%
separate(SAMPLE, c("SAMPLETYPE", "REGION", "VENTNAME"), sep = "_", remove = FALSE) %>%
## Generate PCA plot
ggplot(aes(x = PC1, y = PC2, shape = SAMPLETYPE, fill = VENTNAME)) +
geom_hline(yintercept = 0) + geom_vline(xintercept = 0, color = "#525252") +
geom_point(size=4, stroke = 1, aes(fill = VENTNAME)) +
ylab(paste0('PC2 ',round(variance_lr[2]*100,2),'%')) +
xlab(paste0('PC1 ',round(variance_lr[1]*100,2),'%')) +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_viridis(discrete = TRUE, option = "turbo") +
# scale_fill_manual(values = fill_color) +
# scale_color_manual(values = color_color) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
plot.margin = margin(2, 1, 2, 1, "cm")) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black")))
}From complete dataset, average across replicates, then sum the total number of ASVs in each sample. Then plot a data point for total number of ASVs (ASV richness) by sample type - where sample type represents the vent, plume, vs. background. Box plots show the median and range.
make_asv_rich <- function(df, selection){
df %>%
filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
ungroup() %>%
group_by(SITE, REGION, SAMPLE, SAMPLETYPE) %>%
summarise(NUM_ASV = n()) %>%
ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
geom_jitter(size=2, aes(fill = SITE)) +
scale_shape_manual(values = c(21, 23, 24)) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14)) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black") ) ) +
labs(x = "", y = "Total number of ASVs")
}Bar plot (colors correspond to Supergroup) represents the number of ASVs shared or unique to each sample. Combination matrix below bars shows which samples are considered for the bar plot.
make_upset_plot <- function(df, selection){
df %>%
filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, VENTNAME, sep = " ", remove = FALSE) %>%
distinct(FeatureID, Supergroup, AVG, SAMPLE, .keep_all = TRUE) %>%
group_by(FeatureID, Supergroup) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.5, aes(fill = Supergroup)) +
scale_x_upset(n_intersections = 35) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text = element_text(color="black", size=10),
axis.title = element_text(color="black", size=10),
legend.text = element_text(color = "black", size = 10),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black"))
}Isolate the top 15 taxa
# head(insitu_asv_wClass)
all_class <- as.character(unique(insitu_asv_wClass$CLASS))
all_class_site <- as.character(unique(insitu_asv_wClass$SITE_CLASS))
top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax){
level <- enquo(level)
class <- enquo(class)
plot_tax <- enquo(plot_tax)
all_class <- as.character(unique(insitu_asv_wClass$CLASS))
all_class_site <- as.character(unique(insitu_asv_wClass$SITE_CLASS))
out_table <- df %>%
# filter(SITE %in% site) %>%
filter(!!level == taxa) %>%
# filter(Domain %in% "Eukaryota") %>%
# filter(CLASS %in% all_class) %>%
filter(!!class %in% category) %>%
# Average across replicates
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
VENT, SITE, SAMPLETYPE, YEAR, DATASET, !!class) %>%
summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
# Select top 15 ASVs
group_by(VENT, SITE, SAMPLETYPE, YEAR, DATASET) %>%
top_n(NUM, wt = SEQ_AVG_REP) %>%
unite(SAMPLENAME, SITE, SAMPLETYPE, VENT, YEAR, sep = " ", remove = FALSE)
#
plot <- ggplot(out_table, aes(x = SAMPLENAME, fill = !!plot_tax)) +
geom_bar(stat = "count", color = "black", width = 0.7) +
coord_flip() +
facet_grid(SAMPLETYPE + SITE ~ ., space = "free", scales = "free") +
theme_classic() +
scale_fill_viridis(discrete = TRUE, option = "turbo")
##
plotly::ggplotly(plot)
}
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
# top15(insitu_asv_wClass, 10, all, Phylum, "Ciliophora", CLASS, all_class, Class)
# all_classmake_bar_relabun(insitu_asv_wClass, axial)Axial Seamount samples from archived material - span 2013, 2014, and 2015. First, the background and plume (from 2015 only, and from plume associated with the Anemone vent) are different from the vent samples - overwhelmingly stramneopile and rhizaria. For the background and plume, the stramenopiles appear to be associated with ochrophyta or opalozoa. For the plume, the rhizaria population was associated with cercozoa, while the background seawater was identified as belonging to radiolaria.
The major difference between the background/plume and vent sites was the higher relative sequence abundance of ciliates and opisthokonta. For the opisthokonta, these are primarily metazoa - and I will need to investigate this further. Exceptions for this include the ‘Dependable’ vent from 2013, which had a completely different composition, and ‘Marker 113’ in 2015, which the opisthokonta sequences were assigned choanoflagellate and fungi.
Further questions to consider
Any geochemical changes to Marker 113 from 2013/2014 to 2015? Could attribute difference of opisthokonta colonization.
make_clr_trans_tile(insitu_asv_wClass, axial)## Warning: Expected 4 pieces. Additional pieces discarded in 1872 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Tile plot goes to the Class taxonomic level. Here at Axial, mostly the ciliate class had higher CLR values (more enriched relative to the data mean). Second to ciliates were cercozoa. Also noticing how Marker 113 2013 and 2015 are more similar to each other than 2014?
make_pca(insitu_asv_wClass, axial)## Warning: Expected 4 pieces. Additional pieces discarded in 7717 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
While we only have 1 plume and bsw each for Axial, they are grouping together - away from vents. So that is an expected signature and likely consistent with the other sites. These colors are a little confusing, it does look like Boca is an outlier.
make_asv_rich(insitu_asv_wClass, axial)## Warning: Expected 4 pieces. Additional pieces discarded in 7717 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
We only have 1 sample for background and plume from Axial Seamount. But this shows that the vent sites have varied ASV richness,
make_upset_plot(insitu_asv_wClass, axial)## Warning: Expected 4 pieces. Additional pieces discarded in 7717 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 876 rows containing non-finite values (stat_count).
### Presence-absence at Axial - year and site
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
axial_loc_yr <- insitu_asv_wClass %>%
filter(SITE %in% axial) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
)) %>%
# Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
filter(REGION == "Axial") %>%
unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
unite(YR_LOC, SAMPLETYPE, YEAR, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SupergroupPhylum, YR_LOC) %>%
summarise(SUM = sum(AVG)) %>%
# filter(SUM > 200) %>%
ungroup()## Warning: Expected 4 pieces. Additional pieces discarded in 6856 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
##
axial_loc_yr %>%
distinct(FeatureID, SupergroupPhylum, SUM, YR_LOC, .keep_all = TRUE) %>%
group_by(FeatureID, SupergroupPhylum) %>%
summarise(SAMPLE = list(YR_LOC)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.5, aes(fill = SupergroupPhylum)) +
scale_x_upset(n_intersections = 25) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14),
axis.text.x = element_text(color="black", size=14),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525"))## Warning: Removed 13 rows containing non-finite values (stat_count).
# dev.off()Extract total number of ASVs from Axial data, and those that were found within the vent fluid only.
length(unique(axial_loc_yr$FeatureID))## [1] 3586
tmp <- axial_loc_yr %>%
filter(grepl("Vent", YR_LOC)) %>%
pivot_wider(names_from = YR_LOC, values_from = SUM, values_fill = NA) %>%
drop_na()
head(tmp) ## # A tibble: 6 × 5
## FeatureID SupergroupPhylum `Vent 2013` `Vent 2014` `Vent 2015`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 00b72d1a5fefb03bc39e… Alveolata-Ciliophora 11403 1817 721
## 2 01a71ee728b1597b04d3… Stramenopiles 368 51 224
## 3 04f5a1d4ab104eeb7457… Stramenopiles 273 120 200
## 4 05b6d079805b2bb389fe… Stramenopiles-Sagen… 133 50 10
## 5 068cf0d76352f3978c0e… Stramenopiles 988 40 172
## 6 06cb67e2036452e926aa… Alveolata-Dinoflage… 544 240 140
length(unique(tmp$FeatureID))## [1] 177
tmp %>%
pivot_longer(starts_with("Vent ")) %>%
group_by(name, SupergroupPhylum) %>%
summarise(VALUE = sum(value)) %>%
ggplot(aes(x = name, y = VALUE, fill = SupergroupPhylum)) +
geom_bar(stat = "identity")# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, axial, Domain, "Eukaryota", CLASS, all_class, Phylum)# unique(insitu_asv_wClass$CLASS)
vent_only <- c("Vent only")
# Isolate top 15 vent-only ASVs from axial - at the phylum level
top15(insitu_asv_wClass, 15, axial, Domain, "Eukaryota", CLASS, vent_only, Phylum)# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, axial, Phylum, "Ciliophora", CLASS, vent_only, Class)make_bar_relabun(insitu_asv_wClass, mcr)make_clr_trans_tile(insitu_asv_wClass, mcr)## Warning: Expected 4 pieces. Additional pieces discarded in 1794 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
make_pca(insitu_asv_wClass, mcr)## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
make_asv_rich(insitu_asv_wClass, mcr)## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
make_upset_plot(insitu_asv_wClass, mcr)## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 907 rows containing non-finite values (stat_count).
Repeat presence-absence plot, but with a lower resolution.
insitu_asv_wClass %>%
filter(SITE %in% mcr) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
group_by(FeatureID, Supergroup, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
ungroup() %>%
distinct(FeatureID, Supergroup, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(FeatureID, Supergroup) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.5, aes(fill = Supergroup)) +
scale_x_upset(n_intersections = 15) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text = element_text(color="black", size=10),
axis.title = element_text(color="black", size=10),
legend.text = element_text(color = "black", size = 10),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black"))## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 331 rows containing non-finite values (stat_count).
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, mcr, Domain, "Eukaryota", CLASS, all_class, Phylum)# unique(insitu_asv_wClass$CLASS)
vent_only <- c("Vent only")
# Isolate top 15 vent-only ASVs from MCR - at the phylum level
top15(insitu_asv_wClass, 15, mcr, Domain, "Eukaryota", CLASS, vent_only, Phylum)# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, mcr, Phylum, "Ciliophora", CLASS, vent_only, Class)import_mcr <- read_delim(file = "../../Mid-Cayman_Rise/midcayman-rise-microeuk/table-wcalc.txt", delim = "\t")
# head(import_mcr)
# mcr_metadata <- import_mcr %>%
# select(GRAZING_EFFECT_hr)
# unite(type_site, "2020", _vent_, " ")# View(asv_insitu_qc)
# unique(tmp$SAMPLENAME)
plot_bubble <- function(VARIABLE){
asv_insitu_qc %>%
filter(SITE %in% mcr) %>%
filter(Domain == "Eukaryota") %>%
filter(value > 0) %>%
# Average across replicates
group_by(SAMPLENAME, VENT) %>%
summarise(SUM = sum(value),
ASV_COUNT = n_distinct(FeatureID),
TEMP_avg = mean(TEMP),
PROK_avg = mean(ProkConc)) %>%
ungroup() %>%
pivot_longer(cols = c(SUM, ASV_COUNT, TEMP_avg, PROK_avg)) %>%
filter(name == VARIABLE) %>%
ggplot(aes(x = SAMPLENAME, y = name, size = value)) +
geom_point(shape = 21, color = "black", aes(size = value)) +
scale_size_continuous(range = c(1,16)) +
theme_void() +
theme(axis.text.x = element_text(color = "black", angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(color = "black"),
legend.title = element_blank())
}
# plot_grid(
# plot_bubble("ASV_COUNT") + theme(axis.text.x = element_blank()),
# plot_bubble("SUM") + theme(axis.text.x = element_blank()),
# plot_bubble("PROK_avg") + theme(axis.text.x = element_blank()),
# plot_bubble("TEMP_avg"),
# ncol = 1,
# align = c("hv"),
# axis = c("lrtb")
# )
plot_bubble("ASV_COUNT") + theme(axis.text.x = element_blank()) +
plot_bubble("SUM") + theme(axis.text.x = element_blank()) +
plot_bubble("PROK_avg") + theme(axis.text.x = element_blank()) +
plot_bubble("TEMP_avg") + patchwork::plot_layout(ncol = 1)## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(TEMP): argument is not numeric or logical: returning NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning in mean.default(ProkConc): argument is not numeric or logical: returning
## NA
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 13 rows containing missing values (geom_point).
# ?plot_grid()## Bar plot relative abundance: GR
make_bar_relabun(insitu_asv_wClass, gr)make_clr_trans_tile(insitu_asv_wClass, gr)## Warning: Expected 4 pieces. Additional pieces discarded in 2210 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
make_pca(insitu_asv_wClass, gr)## Warning: Expected 4 pieces. Additional pieces discarded in 9456 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
make_asv_rich(insitu_asv_wClass, gr)## Warning: Expected 4 pieces. Additional pieces discarded in 9456 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
make_upset_plot(insitu_asv_wClass, gr)## Warning: Expected 4 pieces. Additional pieces discarded in 9456 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 926 rows containing non-finite values (stat_count).
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, gr, Domain, "Eukaryota", CLASS, all_class, Phylum)# unique(insitu_asv_wClass$CLASS)
vent_only <- c("Vent only")
# Isolate top 15 vent-only ASVs from GR - at the phylum level
top15(insitu_asv_wClass, 15, gr, Domain, "Eukaryota", CLASS, vent_only, Phylum)# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, gr, Phylum, "Ciliophora", CLASS, vent_only, Class)all <- c("Axial", "VonDamm", "Piccard", "GordaRidge")
mcr <- c("VonDamm", "Piccard")make_bar_relabun(insitu_asv_wClass, all) ### Tree map - simplier taxonomic composition
library(treemapify)
# unique(tmp$SUPERGROUP)Filter data to reduce noise and show sample type to vent ecosystem variability.
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
bkgd <- c("Deep seawater", "BSW", "Shallow seawater")
plume <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Near vent BW")
to_supergroup <- insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
)) %>%
mutate(SAMPLETYPEORDER = case_when(
VENT %in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
TRUE ~ "Vent"
)) %>%
group_by(FeatureID, Taxon, SUPERGROUP,
VENT, SITE, SAMPLETYPE, YEAR, DATASET, SAMPLETYPEORDER) %>%
summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
group_by(SITE, SUPERGROUP, SAMPLETYPEORDER) %>%
summarise(ASV_COUNT = n(),
SEQ_SUM = sum(SEQ_AVG_REP))
# Order sample type
to_supergroup$SAMPLETYPEORDER <- factor(to_supergroup$SAMPLETYPEORDER, levels = c("Background", "Plume", "Vent"))
# View(to_supergroup)
# Remove ASVs with fewer than 200 sequences
to_supergroup %>%
filter(SEQ_SUM > 200) %>%
ggplot(aes(area = SEQ_SUM, fill = SUPERGROUP, subgroup = SUPERGROUP)) +
geom_treemap(color = "white") +
geom_treemap_subgroup_border(colour = "white", size = 2) +
# geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(SITE ~ SAMPLETYPEORDER) +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right",
legend.title = element_blank(),
panel.border = element_blank()) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
labs(x = "", y = "Sequence proportion by Supergroup")# head(to_supergroup)
totalsum_supergroup <- sum(to_supergroup$SEQ_SUM)
to_supergroup %>%
group_by(SITE) %>%
mutate(SUM_SITE = sum(SEQ_SUM)) %>%
ungroup() %>%
group_by(SITE, SAMPLETYPEORDER) %>%
mutate(SUM_SITE_TYPE = sum(SEQ_SUM)) %>%
ungroup() %>%
mutate(Perc_seq_total = (100*(SEQ_SUM/totalsum_supergroup)),
Perc_seq_site = (100*(SEQ_SUM/SUM_SITE)),
Perc_seq_site_type = (100*(SEQ_SUM/SUM_SITE_TYPE))) %>%
select(-starts_with("SUM_")) %>%
gt(
groupname_col = c("SITE", "SAMPLETYPEORDER")
# rowname_col = "SAMPLENAME"
) %>%
fmt_number(columns = starts_with("Perc_"), decimals = 2) %>%
fmt_number(columns = SEQ_SUM, decimals = 0) %>%
fmt_number(columns = ASV_COUNT, decimals = 0)| SUPERGROUP | ASV_COUNT | SEQ_SUM | Perc_seq_total | Perc_seq_site | Perc_seq_site_type |
|---|---|---|---|---|---|
| Axial - Background | |||||
| Alveolata-Ciliophora | 82 | 6,564 | 0.25 | 1.24 | 14.83 |
| Alveolata-Dinoflagellata | 164 | 8,514 | 0.32 | 1.61 | 19.24 |
| Amoebozoa | 3 | 9 | 0.00 | 0.00 | 0.02 |
| Apusozoa | 1 | 20 | 0.00 | 0.00 | 0.05 |
| Archaeplastida | 5 | 275 | 0.01 | 0.05 | 0.62 |
| Hacrobia | 38 | 936 | 0.04 | 0.18 | 2.12 |
| Rhizaria-Cercozoa | 4 | 91 | 0.00 | 0.02 | 0.21 |
| Rhizaria-Radiolaria | 65 | 6,653 | 0.25 | 1.26 | 15.03 |
| Stramenopiles | 13 | 340 | 0.01 | 0.06 | 0.77 |
| Stramenopiles-Ochrophyta | 21 | 4,450 | 0.17 | 0.84 | 10.06 |
| Stramenopiles-Opalozoa | 28 | 16,103 | 0.61 | 3.05 | 36.39 |
| Stramenopiles-Sagenista | 14 | 300 | 0.01 | 0.06 | 0.68 |
| Axial - Vent | |||||
| Alveolata-Ciliophora | 1,771 | 279,739 | 10.54 | 53.00 | 57.85 |
| Alveolata-Dinoflagellata | 1,315 | 57,012 | 2.15 | 10.80 | 11.79 |
| Amoebozoa | 60 | 1,371 | 0.05 | 0.26 | 0.28 |
| Apusozoa | 101 | 3,186 | 0.12 | 0.60 | 0.66 |
| Archaeplastida | 70 | 1,342 | 0.05 | 0.25 | 0.28 |
| Excavata | 43 | 1,437 | 0.05 | 0.27 | 0.30 |
| Hacrobia | 453 | 16,363 | 0.62 | 3.10 | 3.38 |
| Other Alveolata | 48 | 765 | 0.03 | 0.14 | 0.16 |
| Protalveolata | 4 | 131 | 0.00 | 0.02 | 0.03 |
| Rhizaria-Cercozoa | 570 | 18,067 | 0.68 | 3.42 | 3.74 |
| Rhizaria-Radiolaria | 530 | 23,438 | 0.88 | 4.44 | 4.85 |
| Stramenopiles | 389 | 21,937 | 0.83 | 4.16 | 4.54 |
| Stramenopiles-Ochrophyta | 581 | 41,518 | 1.56 | 7.87 | 8.59 |
| Stramenopiles-Opalozoa | 224 | 8,756 | 0.33 | 1.66 | 1.81 |
| Stramenopiles-Sagenista | 259 | 8,520 | 0.32 | 1.61 | 1.76 |
| GordaRidge - Background | |||||
| Alveolata-Ciliophora | 228 | 16,972 | 0.64 | 4.19 | 28.17 |
| Alveolata-Dinoflagellata | 294 | 13,070 | 0.49 | 3.23 | 21.70 |
| Amoebozoa | 1 | 10 | 0.00 | 0.00 | 0.02 |
| Apusozoa | 2 | 24 | 0.00 | 0.01 | 0.04 |
| Archaeplastida | 18 | 724 | 0.03 | 0.18 | 1.20 |
| Excavata | 1 | 86 | 0.00 | 0.02 | 0.14 |
| Hacrobia | 95 | 4,340 | 0.16 | 1.07 | 7.20 |
| Other Alveolata | 1 | 5 | 0.00 | 0.00 | 0.01 |
| Rhizaria-Cercozoa | 11 | 204 | 0.01 | 0.05 | 0.34 |
| Rhizaria-Radiolaria | 128 | 5,016 | 0.19 | 1.24 | 8.33 |
| Stramenopiles | 10 | 178 | 0.01 | 0.04 | 0.30 |
| Stramenopiles-Ochrophyta | 86 | 4,911 | 0.19 | 1.21 | 8.15 |
| Stramenopiles-Opalozoa | 90 | 14,308 | 0.54 | 3.53 | 23.75 |
| Stramenopiles-Sagenista | 22 | 391 | 0.01 | 0.10 | 0.65 |
| GordaRidge - Plume | |||||
| Alveolata-Ciliophora | 319 | 17,438 | 0.66 | 4.31 | 13.73 |
| Alveolata-Dinoflagellata | 604 | 46,774 | 1.76 | 11.55 | 36.82 |
| Apusozoa | 6 | 294 | 0.01 | 0.07 | 0.23 |
| Archaeplastida | 30 | 1,866 | 0.07 | 0.46 | 1.47 |
| Excavata | 1 | 560 | 0.02 | 0.14 | 0.44 |
| Hacrobia | 89 | 4,617 | 0.17 | 1.14 | 3.63 |
| Other Alveolata | 7 | 61 | 0.00 | 0.02 | 0.05 |
| Rhizaria-Cercozoa | 32 | 958 | 0.04 | 0.24 | 0.75 |
| Rhizaria-Radiolaria | 272 | 32,645 | 1.23 | 8.06 | 25.70 |
| Stramenopiles | 22 | 442 | 0.02 | 0.11 | 0.35 |
| Stramenopiles-Ochrophyta | 138 | 6,294 | 0.24 | 1.55 | 4.96 |
| Stramenopiles-Opalozoa | 98 | 14,144 | 0.53 | 3.49 | 11.14 |
| Stramenopiles-Sagenista | 42 | 925 | 0.03 | 0.23 | 0.73 |
| GordaRidge - Vent | |||||
| Alveolata-Ciliophora | 837 | 82,810 | 3.12 | 20.45 | 38.06 |
| Alveolata-Dinoflagellata | 1,384 | 59,161 | 2.23 | 14.61 | 27.19 |
| Amoebozoa | 22 | 406 | 0.02 | 0.10 | 0.19 |
| Apusozoa | 24 | 496 | 0.02 | 0.12 | 0.23 |
| Archaeplastida | 50 | 2,252 | 0.08 | 0.56 | 1.03 |
| Excavata | 6 | 238 | 0.01 | 0.06 | 0.11 |
| Hacrobia | 295 | 10,004 | 0.38 | 2.47 | 4.60 |
| Other Alveolata | 28 | 682 | 0.03 | 0.17 | 0.31 |
| Rhizaria-Cercozoa | 232 | 6,690 | 0.25 | 1.65 | 3.07 |
| Rhizaria-Radiolaria | 498 | 19,181 | 0.72 | 4.74 | 8.82 |
| Stramenopiles | 108 | 5,144 | 0.19 | 1.27 | 2.36 |
| Stramenopiles-Ochrophyta | 329 | 11,665 | 0.44 | 2.88 | 5.36 |
| Stramenopiles-Opalozoa | 177 | 16,303 | 0.61 | 4.03 | 7.49 |
| Stramenopiles-Sagenista | 128 | 2,557 | 0.10 | 0.63 | 1.18 |
| Piccard - Background | |||||
| Alveolata-Ciliophora | 58 | 7,567 | 0.29 | 1.45 | 6.22 |
| Alveolata-Dinoflagellata | 238 | 38,855 | 1.46 | 7.46 | 31.93 |
| Amoebozoa | 1 | 22 | 0.00 | 0.00 | 0.02 |
| Apusozoa | 2 | 144 | 0.01 | 0.03 | 0.12 |
| Archaeplastida | 11 | 160 | 0.01 | 0.03 | 0.13 |
| Hacrobia | 65 | 4,671 | 0.18 | 0.90 | 3.84 |
| Other Alveolata | 2 | 197 | 0.01 | 0.04 | 0.16 |
| Rhizaria-Cercozoa | 17 | 4,743 | 0.18 | 0.91 | 3.90 |
| Rhizaria-Radiolaria | 81 | 26,658 | 1.00 | 5.12 | 21.91 |
| Stramenopiles | 10 | 411 | 0.02 | 0.08 | 0.34 |
| Stramenopiles-Ochrophyta | 28 | 24,358 | 0.92 | 4.68 | 20.02 |
| Stramenopiles-Opalozoa | 25 | 13,126 | 0.49 | 2.52 | 10.79 |
| Stramenopiles-Sagenista | 14 | 780 | 0.03 | 0.15 | 0.64 |
| Piccard - Plume | |||||
| Alveolata-Ciliophora | 68 | 26,322 | 0.99 | 5.05 | 31.18 |
| Alveolata-Dinoflagellata | 75 | 20,205 | 0.76 | 3.88 | 23.94 |
| Amoebozoa | 6 | 27 | 0.00 | 0.01 | 0.03 |
| Apusozoa | 1 | 240 | 0.01 | 0.05 | 0.28 |
| Hacrobia | 6 | 490 | 0.02 | 0.09 | 0.58 |
| Rhizaria-Cercozoa | 6 | 2,683 | 0.10 | 0.52 | 3.18 |
| Rhizaria-Radiolaria | 44 | 15,334 | 0.58 | 2.94 | 18.17 |
| Stramenopiles | 4 | 367 | 0.01 | 0.07 | 0.43 |
| Stramenopiles-Ochrophyta | 13 | 1,398 | 0.05 | 0.27 | 1.66 |
| Stramenopiles-Opalozoa | 49 | 16,330 | 0.62 | 3.14 | 19.35 |
| Stramenopiles-Sagenista | 15 | 1,018 | 0.04 | 0.20 | 1.21 |
| Piccard - Vent | |||||
| Alveolata-Ciliophora | 190 | 182,023 | 6.86 | 34.96 | 57.86 |
| Alveolata-Dinoflagellata | 290 | 46,719 | 1.76 | 8.97 | 14.85 |
| Amoebozoa | 10 | 595 | 0.02 | 0.11 | 0.19 |
| Apusozoa | 4 | 237 | 0.01 | 0.05 | 0.08 |
| Archaeplastida | 12 | 401 | 0.02 | 0.08 | 0.13 |
| Excavata | 3 | 206 | 0.01 | 0.04 | 0.07 |
| Hacrobia | 232 | 29,309 | 1.10 | 5.63 | 9.32 |
| Other Alveolata | 6 | 139 | 0.01 | 0.03 | 0.04 |
| Rhizaria-Cercozoa | 51 | 13,567 | 0.51 | 2.61 | 4.31 |
| Rhizaria-Radiolaria | 98 | 23,308 | 0.88 | 4.48 | 7.41 |
| Stramenopiles | 38 | 3,727 | 0.14 | 0.72 | 1.18 |
| Stramenopiles-Ochrophyta | 137 | 10,058 | 0.38 | 1.93 | 3.20 |
| Stramenopiles-Opalozoa | 44 | 2,051 | 0.08 | 0.39 | 0.65 |
| Stramenopiles-Sagenista | 59 | 2,274 | 0.09 | 0.44 | 0.72 |
| VonDamm - Background | |||||
| Alveolata-Ciliophora | 86 | 15,073 | 0.57 | 1.26 | 14.10 |
| Alveolata-Dinoflagellata | 261 | 26,214 | 0.99 | 2.18 | 24.53 |
| Apusozoa | 1 | 12 | 0.00 | 0.00 | 0.01 |
| Archaeplastida | 1 | 5 | 0.00 | 0.00 | 0.00 |
| Excavata | 1 | 5 | 0.00 | 0.00 | 0.00 |
| Hacrobia | 67 | 3,174 | 0.12 | 0.26 | 2.97 |
| Rhizaria-Cercozoa | 13 | 807 | 0.03 | 0.07 | 0.76 |
| Rhizaria-Radiolaria | 133 | 23,934 | 0.90 | 1.99 | 22.40 |
| Stramenopiles | 38 | 1,712 | 0.06 | 0.14 | 1.60 |
| Stramenopiles-Ochrophyta | 49 | 1,895 | 0.07 | 0.16 | 1.77 |
| Stramenopiles-Opalozoa | 75 | 32,063 | 1.21 | 2.67 | 30.00 |
| Stramenopiles-Sagenista | 37 | 1,975 | 0.07 | 0.16 | 1.85 |
| VonDamm - Plume | |||||
| Alveolata-Ciliophora | 79 | 9,747 | 0.37 | 0.81 | 6.83 |
| Alveolata-Dinoflagellata | 463 | 48,444 | 1.83 | 4.03 | 33.96 |
| Amoebozoa | 1 | 205 | 0.01 | 0.02 | 0.14 |
| Apusozoa | 1 | 30 | 0.00 | 0.00 | 0.02 |
| Archaeplastida | 2 | 10 | 0.00 | 0.00 | 0.01 |
| Hacrobia | 62 | 4,973 | 0.19 | 0.41 | 3.49 |
| Other Alveolata | 3 | 70 | 0.00 | 0.01 | 0.05 |
| Rhizaria-Cercozoa | 20 | 6,267 | 0.24 | 0.52 | 4.39 |
| Rhizaria-Radiolaria | 152 | 33,754 | 1.27 | 2.81 | 23.67 |
| Stramenopiles | 55 | 4,543 | 0.17 | 0.38 | 3.19 |
| Stramenopiles-Ochrophyta | 51 | 15,748 | 0.59 | 1.31 | 11.04 |
| Stramenopiles-Opalozoa | 83 | 11,298 | 0.43 | 0.94 | 7.92 |
| Stramenopiles-Sagenista | 58 | 7,542 | 0.28 | 0.63 | 5.29 |
| VonDamm - Vent | |||||
| Alveolata-Ciliophora | 501 | 441,050 | 16.62 | 36.73 | 46.36 |
| Alveolata-Dinoflagellata | 1,069 | 154,304 | 5.81 | 12.85 | 16.22 |
| Amoebozoa | 24 | 4,905 | 0.18 | 0.41 | 0.52 |
| Apusozoa | 17 | 481 | 0.02 | 0.04 | 0.05 |
| Archaeplastida | 39 | 2,061 | 0.08 | 0.17 | 0.22 |
| Excavata | 18 | 484 | 0.02 | 0.04 | 0.05 |
| Hacrobia | 657 | 92,844 | 3.50 | 7.73 | 9.76 |
| Other Alveolata | 23 | 1,969 | 0.07 | 0.16 | 0.21 |
| Rhizaria-Cercozoa | 213 | 14,901 | 0.56 | 1.24 | 1.57 |
| Rhizaria-Radiolaria | 396 | 115,832 | 4.36 | 9.65 | 12.18 |
| Stramenopiles | 192 | 30,478 | 1.15 | 2.54 | 3.20 |
| Stramenopiles-Ochrophyta | 339 | 55,996 | 2.11 | 4.66 | 5.89 |
| Stramenopiles-Opalozoa | 175 | 12,790 | 0.48 | 1.07 | 1.34 |
| Stramenopiles-Sagenista | 210 | 23,222 | 0.87 | 1.93 | 2.44 |
A better approach down below after isolating the vent-only ASVs.
# make_clr_trans_tile(insitu_asv_wClass, all)make_pca(insitu_asv_wClass, all)## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Repeat, but color by Region and sample type.
df_wide_asv <- insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
group_by(FeatureID, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLE")## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# look at eigenvalues
pca_lr <- prcomp(data.frame(compositions::clr(df_wide_asv)))
variance_lr <- (pca_lr$sdev^2)/sum(pca_lr$sdev^2)
## View bar plot
barplot(variance_lr, main = "Log-Ratio PCA Screeplot", xlab = "PC Axis", ylab = "% Variance",
cex.names = 1.5, cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5) ## Extract PCR points
data.frame(pca_lr$x, SAMPLE = rownames(pca_lr$x)) %>%
separate(SAMPLE, c("SAMPLETYPE", "REGION", "VENTNAME"), sep = "_", remove = FALSE) %>%
## Generate PCA plot
ggplot(aes(x = PC1, y = PC2, shape = SAMPLETYPE, fill = REGION)) +
geom_hline(yintercept = 0) + geom_vline(xintercept = 0, color = "#525252") +
geom_point(size=3, stroke = 1, aes(fill = REGION)) +
ylab(paste0('PC2 ',round(variance_lr[2]*100,2),'%')) +
xlab(paste0('PC1 ',round(variance_lr[1]*100,2),'%')) +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
plot.margin = margin(2, 1, 2, 1, "cm")) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black")))# head(df_wide_asv)Modify sample names for dendrogram plot.
df <- as.data.frame(t(df_wide_asv))
###
colnames(df) <- gsub(x = names(df), pattern = "_", replacement = " ")
colnames(df) <- gsub(x = names(df), pattern = "Vent Axial", replacement = "Axial")
colnames(df) <- gsub(x = names(df), pattern = "Vent GordaRidge", replacement = "Gorda Ridge")
colnames(df) <- gsub(x = names(df), pattern = "GordaRidge", replacement = "Gorda Ridge")
colnames(df) <- gsub(x = names(df), pattern = "Plume ", replacement = "")
colnames(df) <- gsub(x = names(df), pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")
colnames(df) <- gsub(x = names(df), pattern = "Vent Piccard Piccard", replacement = "Piccard")
colnames(df) <- gsub(x = names(df), pattern = "Background GordaRidge", replacement = "Gorda Ridge")
colnames(df) <- gsub(x = names(df), pattern = "VonDamm VonDamm", replacement = "Von Damm")
colnames(df) <- gsub(x = names(df), pattern = "Piccard Piccard", replacement = "Piccard")
colnames(df) <- gsub(x = names(df), pattern = " BSW", replacement = "")
colnames(df) <- gsub(x = names(df), pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")
# Write over same data frame - fix sample names
dendro_input <- df
# head(dendro_input)Estimate Jaccard distance
# ?vegan::decostand
# ?vegdist
dendro_jacc <- vegan::vegdist(t(dendro_input), method = "jaccard")
# head(dendro_jacc)
cluster_jacc <- hclust(dist(t(dendro_jacc)), method = "average")
library(ggdendro)
dendro_plot_df <- ggdendro::dendro_data(as.dendrogram(cluster_jacc), type = "rectangle")
label_dendro_order <- as.character(dendro_plot_df$labels$label)
# label_dendro_orderPlot dendrogram
dendro_plot_output <- ggplot(segment(dendro_plot_df)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
coord_flip() +
scale_y_reverse(expand = c(0.2, 0.5), breaks = c(0, 0.2, 0.4, 0.6, 0.8)) +
geom_text(aes(x = x, y = y, label = label, angle = 0, hjust = 0), data = label(dendro_plot_df)) +
theme_dendro() +
labs(y = "Dissimilarity", title = "Jaccard distance") +
theme(axis.text.x = element_text(color = "black", size = 14),
axis.line.x = element_line(color = "#252525"),
axis.ticks.x = element_line(), axis.title.x = element_text(color = "black", size = 14))Add bar plot in the same order to show proportion of resident versus cosmpolitan ASVs in each sample.
# head(insitu_asv_wClass)
# unique(insitu_asv_wClass$SITE_CLASS)
# unique(insitu_asv_wClass$CLASS)
dendro_bar <- insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SITE_CLASS) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
mutate(SITE_CLASS_2 = case_when(
SITE_CLASS == "Piccard & Von Damm" ~ "Piccard & Von Damm only",
SITE_CLASS == "Piccard & Axial" ~ "MCR & Axial",
SITE_CLASS == "Piccard & Gorda Ridge" ~ "MCR & Gorda Ridge",
SITE_CLASS == "Von Damm & Axial" ~ "MCR & Axial",
SITE_CLASS == "Von Damm & Gorda Ridge" ~ "MCR & Gorda Ridge",
SITE_CLASS == "Piccard, Axial, & Gorda Ridge" ~ "MCR, Axial, & Gorda Ridge",
SITE_CLASS == "Von Damm, Axial, & Gorda Ridge" ~ "MCR, Axial, & Gorda Ridge",
TRUE ~ SITE_CLASS
)) %>%
group_by(SITE_CLASS_2, SAMPLE) %>%
summarise(SEQ_SUM = sum(AVG),
ASV_COUNT = n()) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume ", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) ## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# unique(insitu_asv_wClass$CLASS)
cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
dendro_res_cos_df <- insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
mutate(DISTRIBUTION = case_when(
CLASS %in% cosmo ~ "Cosmopolitan",
TRUE ~ CLASS
)) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, DISTRIBUTION) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
group_by(DISTRIBUTION, SAMPLE) %>%
summarise(SEQ_SUM = sum(AVG),
ASV_COUNT = n()) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume ", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) ## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dendro_bar$SAMPLE_ORDER <- factor(dendro_bar$SAMPLE, levels = label_dendro_order)
dendro_res_cos_df$SAMPLE_ORDER <- factor(dendro_res_cos_df$SAMPLE, levels = label_dendro_order)
# dendro_bar$SITE_CLASS_ORDER <- factor(dendro_bar$SITE_CLASS, levels = c("All sites","Von Damm only","Piccard only","Piccard & Von Damm","MCR & Axial","MCR & Gorda Ridge","Piccard & Axial","Piccard & Gorda Ridge","Piccard, Axial, & Gorda Ridge","Von Damm & Axial","Von Damm & Gorda Ridge","Von Damm, Axial, & Gorda Ridge","Gorda Ridge only","Axial only","Axial & Gorda Ridge"))
dendro_bar$SITE_CLASS_ORDER <- factor(dendro_bar$SITE_CLASS_2, levels = c("All sites","Von Damm only","Piccard only","Piccard & Von Damm only","MCR & Axial","MCR & Gorda Ridge", "MCR, Axial, & Gorda Ridge", "Gorda Ridge only","Axial only","Axial & Gorda Ridge"))
# unique(dendro_bar$SITE_CLASS)dendro_bar_plot <- ggplot(dendro_bar, aes(x = SAMPLE_ORDER, y = ASV_COUNT, fill = SITE_CLASS_ORDER)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
# scale_fill_manual(values = c("grey", "#e6550d", "#fdbb84", "#31a354", "#1c9099", "#fde0dd", "#c51b8a")) +
scale_fill_manual(values = c("#636363","#bd0026","#fed976","#fd8d3c","#a63603","#fdae6b","#c994c7","#00441b","#ce1256","#addd8e","#f7fcb9","#016c59","#41ab5d","#6a51a3","#3690c0")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "top") +
labs(x = "", y = "Proportion of ASVs")
dendro_bar_plot_res_cos <- ggplot(dendro_res_cos_df, aes(x = SAMPLE_ORDER, y = ASV_COUNT, fill = DISTRIBUTION)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
viridis::scale_fill_viridis(discrete=TRUE, option = "H") +
theme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "top") +
labs(x = "", y = "Proportion of ASVs")
# dendro_bar_plot_res_cos
# ?scale_fill_viridis
dendro_bar_plot_SEQ <- ggplot(dendro_bar, aes(x = SAMPLE_ORDER, y = SEQ_SUM, fill = SITE_CLASS_ORDER)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
# scale_fill_manual(values = c("grey", "#e6550d", "#fdbb84", "#31a354", "#1c9099", "#fde0dd", "#c51b8a")) +
scale_fill_manual(values = c("#636363","#bd0026","#fed976","#fd8d3c","#a63603","#fdae6b","#c994c7","#00441b","#ce1256","#addd8e","#f7fcb9","#016c59","#41ab5d","#6a51a3","#3690c0")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "top") +
labs(x = "", y = "Proportion of sequences")
dendro_bar_plot_res_cos_SEQ <- ggplot(dendro_res_cos_df, aes(x = SAMPLE_ORDER, y = SEQ_SUM, fill = DISTRIBUTION)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
viridis::scale_fill_viridis(discrete=TRUE, option = "H") +
theme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "t∂p") +
labs(x = "", y = "Proportion of sequences")
# dendro_bar_plotCombine all plots with patchwork.
# library(patchwork)
# svg("dendrogram_wbarplots.svg", w = 18, h = 12)
dendro_plot_output + dendro_bar_plot + dendro_bar_plot_res_cos + patchwork::plot_layout(nrow = 1, widths = c(1, 0.2, 0.2),
heights = c(1.5, 0.2, 0.2)) + plot_annotation(tag_levels = "a")# dev.off()
# ?plot_layout
# ?plot_annotationCreate the same plot, but by sequence proportion.
# svg("dendrogram_wbarplots.svg", w = 18, h = 12)
dendro_plot_output + dendro_bar_plot_SEQ + dendro_bar_plot_res_cos_SEQ + patchwork::plot_layout(nrow = 1, widths = c(1, 0.2, 0.2),
heights = c(1.5, 0.2, 0.2)) + plot_annotation(tag_levels = "a")# dev.off()# make_asv_rich(insitu_asv_wClass, all)ASV richness with customized color schema
insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
ungroup() %>%
group_by(SITE, REGION, SAMPLE, SAMPLETYPE) %>%
summarise(NUM_ASV = n()) %>%
ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
geom_jitter(size=2, width = 0.3, aes(fill = SITE)) +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14)) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = NA) ) ) +
labs(x = "", y = "Total number of ASVs")## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
ungroup() %>%
group_by(SITE, REGION, SAMPLE, SAMPLETYPE, Supergroup) %>%
summarise(NUM_ASV = n()) %>%
ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
geom_jitter(size=2, width = 0.3, aes(fill = Supergroup)) +
facet_wrap(.~ Supergroup, scales = "free_y") +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14)) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = NA) ) ) +
labs(x = "", y = "Total number of ASVs")## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# make_upset_plot(insitu_asv_wClass, all)
# head(insitu_asv_wClass)Repeat above plot, but resolve by sample location and sample type.
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
)) %>%
# Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SupergroupPhylum, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
# filter(SUM > 200) %>%
ungroup() %>%
distinct(FeatureID, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(FeatureID, SupergroupPhylum) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.5, aes(fill = SupergroupPhylum)) +
scale_x_upset(n_intersections = 25) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14),
axis.text.x = element_text(color="black", size=14),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525"))## Warning: Expected 4 pieces. Additional pieces discarded in 23244 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 1425 rows containing non-finite values (stat_count).
# dev.off()Observations regarding above plot: - Axial and Gorda Ridge vent sites have more shared ASVs than any other pairwise comparison. After this, there were also many ASVs shared throughout MCR (vent, plume, + background). May be a reflection of sample size, as MCR had more vent sites - a small subset of ASVs were found at all vent sites or all samples. - ASVs within the vents had much higher unique # of ASVs (not shared with another habitat type) than any other sample type/location (furtherest left bars).
Repeat upsetR plot, but summarize at genus level, rather than “species” or “strain”
head(insitu_asv_wClass)## # A tibble: 6 × 34
## FeatureID SAMPLE value Taxon Domain Supergroup Phylum Class Order Family Genus
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 00056209… Gorda… 8 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 2 00056209… Gorda… 13 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 3 00096455… Gorda… 91 Euka… Eukar… Rhizaria Radio… Acan… <NA> <NA> <NA>
## 4 000ee377… Axial… 282 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 5 000ee377… Axial… 32 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 6 00165708… Gorda… 1 Euka… Eukar… Stramenop… Ochro… Pela… Pela… Pelag… Pela…
## # … with 23 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## # VENT <chr>, COORDINATES <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>,
## # SAMPLETYPE <chr>, YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>,
## # Mg <chr>, H2 <chr>, H2S <chr>, CH4 <chr>, ProkConc <chr>,
## # Sample_or_Control <chr>, DATASET <chr>, DECONTAM <chr>, CLASS <chr>,
## # SITE_CLASS <chr>
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
)) %>%
# Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
unite(GENUS, Domain:Genus, sep = ";") %>%
# Average across replicates
group_by(GENUS, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
group_by(GENUS, SupergroupPhylum, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
# filter(SUM > 200) %>%
ungroup() %>%
distinct(GENUS, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(GENUS, SupergroupPhylum) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.5, aes(fill = SupergroupPhylum)) +
scale_x_upset(n_intersections = 25) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared at Genus level") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14),
axis.text.x = element_text(color="black", size=14),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525"))## Warning: Expected 4 pieces. Additional pieces discarded in 8229 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 316 rows containing non-finite values (stat_count).
Isolate list of genus level that are shared at all sites.
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
shared_genus <- insitu_asv_wClass %>%
filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
)) %>%
# Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
unite(GENUS, Domain:Genus, sep = ";") %>%
# Average across replicates
group_by(GENUS, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
select(GENUS, SAMPLE, AVG) %>%
pivot_wider(names_from = SAMPLE, values_from = "AVG", values_fn = sum) %>%
drop_na()## Warning: Expected 4 pieces. Additional pieces discarded in 8229 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(shared_genus)head(insitu_asv_wClass)## # A tibble: 6 × 34
## FeatureID SAMPLE value Taxon Domain Supergroup Phylum Class Order Family Genus
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 00056209… Gorda… 8 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 2 00056209… Gorda… 13 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 3 00096455… Gorda… 91 Euka… Eukar… Rhizaria Radio… Acan… <NA> <NA> <NA>
## 4 000ee377… Axial… 282 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 5 000ee377… Axial… 32 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 6 00165708… Gorda… 1 Euka… Eukar… Stramenop… Ochro… Pela… Pela… Pelag… Pela…
## # … with 23 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## # VENT <chr>, COORDINATES <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>,
## # SAMPLETYPE <chr>, YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>,
## # Mg <chr>, H2 <chr>, H2S <chr>, CH4 <chr>, ProkConc <chr>,
## # Sample_or_Control <chr>, DATASET <chr>, DECONTAM <chr>, CLASS <chr>,
## # SITE_CLASS <chr>
gr_axial_shared <- insitu_asv_wClass %>%
filter(SITE_CLASS == "Axial & Gorda Ridge") %>%
filter(CLASS == "Vent only") %>%
mutate(OCEAN = "Found throughout NE Pacific vents")
length(unique(gr_axial_shared$FeatureID)) ## [1] 330
Extract coordinates from Gorda Ridge and Axial Seamount, calculate distance between each vent site.
library(geosphere)
axial_vents <- insitu_asv_wClass %>%
filter(SAMPLETYPE == "Vent") %>%
filter(SITE == "Axial")
gr_vents <- insitu_asv_wClass %>%
filter(SAMPLETYPE == "Vent") %>%
filter(SITE == "GordaRidge")
est_distances_gr_axial <- function(df){
df_out <- df %>%
select(VENT, COORDINATES) %>%
separate(COORDINATES, into = c("lat", "N", "long", "W"), sep = " ") %>%
mutate(
LONG_EW = as.numeric(formatC(as.numeric(long), digits = 4, format = "f")),
LAT = as.numeric(formatC(as.numeric(lat), digits = 4, format = "f")),
) %>%
mutate(LONG = case_when(
W == "W" ~ (LONG_EW*-1),
W == "E" ~ LONG_EW,
W == "" ~ (LONG_EW*-1)
)) %>%
select(-lat, -N, -long, -W, -LONG_EW) %>%
relocate(LONG) %>%
distinct(VENT, .keep_all = TRUE) %>%
column_to_rownames(var = "VENT")
vents <- row.names(df_out)
distance_m <- as.data.frame(distm(df_out, fun = distHaversine))
colnames(distance_m) <- vents
row.names(distance_m) <- vents
# Create matrix with distance in meters
dist_m <- distance_m %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
filter(!(start == end))
# create wide format
df_out_wide <- df %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
# REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION == "Axial" ~ VENT
)) %>% select(-Sample_tmp) %>%
# unite(VENTNAME, sep = "_", remove = FALSE) %>%
group_by(FeatureID, VENT) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "VENT")
# Assign row names and calc distance metric
vents <- row.names(df_out_wide)
dist_bray <- as.data.frame(as.matrix(vegdist(df_out_wide, method = "bray"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Bray_Curtis_metric = value)
dist_jacc <- as.data.frame(as.matrix(vegdist(df_out_wide, method = "jaccard"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Jaccard_metric = value)
dist_jacc_clr <- as.data.frame(as.matrix(vegdist(compositions::clr(df_out_wide), method = "euclidean"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Jaccard_CLR_metric = value)
dist_ilr_euc <- as.data.frame(as.matrix(dist(compositions::ilr(df_out_wide), method = "euclidean"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Euclidean_ILR_metric = value)
# Combine all distance outputs - use dist_m
dist_compiled_output <- dist_m %>%
select(start, end, meters = value) %>%
left_join(dist_jacc_clr) %>%
left_join(dist_bray) %>%
left_join(dist_jacc) %>%
left_join(dist_ilr_euc) %>%
pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>%
filter(!(start == end))
}Estimate distance matrix among Gorda Ridge and Axial Seamount samples - compare with geographic distance. For simplifying this, Axial seamount samples have been combined.
axial_dist <- est_distances_gr_axial(axial_vents)## Warning: Expected 4 pieces. Additional pieces discarded in 6824 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
gr_dist <- est_distances_gr_axial(gr_vents)## Warning: Expected 4 pieces. Additional pieces discarded in 6615 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Compile distances in meters and as distance estimate.
ggplot(axial_dist, aes(x = meters, y = value)) +
geom_jitter(stat = "identity") +
facet_grid(comm_dist ~ ., scales = "free") +
geom_smooth(method="lm") +
labs(title = "Axial Seamount", x = "Distance (m)", y = "Metric") +
theme_linedraw() +
ggplot(gr_dist, aes(x = meters, y = value)) +
geom_jitter(stat = "identity") +
facet_grid(comm_dist ~ ., scales = "free") +
geom_smooth(method="lm") +
labs(title = "Gorda Ridge", x = "Distance (m)", y = "Metric") +
scale_x_log10() + theme_linedraw() +
patchwork::plot_layout(ncol = 2)tmp2 <- gr_axial_shared %>%
filter(value > 25)
hist(tmp2$value)length(unique(tmp2$FeatureID))## [1] 237
more_than_25 <- as.character(unique(tmp2$FeatureID))# head(gr_axial_shared)
gr_axial_shared %>%
filter(FeatureID %in% more_than_25) %>%
filter(!is.na(Phylum)) %>%
filter(!(Phylum == "Metazoa")) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
# REGION == "Axial" ~ VENT
)) %>% select(-Sample_tmp) %>%
group_by(SITE, YEAR, VENTNAME, FeatureID, Taxon, Domain, Supergroup, Phylum, Class) %>%
summarise(SUM = sum(value)) %>%
ungroup() %>%
mutate(BIN = ifelse(SUM > 0, 1, NA)) %>%
# pivot_wider(names_from = SAMPLENAME, values_from = BIN) %>%
ggplot(aes(x = VENTNAME, y = FeatureID, fill = BIN)) +
geom_tile(fill = "grey", color = "black") +
theme_minimal() +
facet_grid(Phylum ~ SITE + YEAR, space = "free", scales = "free") +
theme(axis.text.y = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5))## Warning: Expected 4 pieces. Additional pieces discarded in 871 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# ?ifelse()Characterize putative endemic ASVs within MCR only.
unique(insitu_asv_wClass$SITE_CLASS)## [1] "Gorda Ridge only" "Axial only"
## [3] "Von Damm only" "Piccard & Von Damm"
## [5] "Piccard only" "MCR & Axial"
## [7] "Axial & Gorda Ridge" "Von Damm & Gorda Ridge"
## [9] "Von Damm, Axial, & Gorda Ridge" "Von Damm & Axial"
## [11] "All sites" "MCR & Gorda Ridge"
## [13] "Piccard, Axial, & Gorda Ridge" "Piccard & Gorda Ridge"
## [15] "Piccard & Axial"
vd_picc_shared <- insitu_asv_wClass %>%
filter(SITE_CLASS == "Piccard & Von Damm") %>%
filter(CLASS == "Vent only") %>%
mutate(OCEAN = "Found throughout MCR vents")
length(unique(vd_picc_shared$FeatureID))## [1] 235
table(vd_picc_shared$Supergroup)##
## Alveolata Amoebozoa Archaea_X Archaeplastida Hacrobia
## 171 2 5 14 243
## Opisthokonta Rhizaria Stramenopiles
## 44 41 296
table(gr_axial_shared$Supergroup)##
## Alveolata Amoebozoa Apusozoa Archaeplastida Excavata
## 319 27 34 4 19
## Hacrobia Opisthokonta Rhizaria Stramenopiles
## 67 327 223 261
Isolate samples from Von Damm and Piccard vent sites only.
vd_picc_vents <- insitu_asv_wClass %>%
filter(SAMPLETYPE == "Vent") %>%
filter(SITE == "VonDamm" | SITE == "Piccard") %>%
mutate(coord = case_when(
VENT == "ShrimpHole" ~ "18.374893, -81.797441",
TRUE ~ COORDINATES
)) %>%
select(-COORDINATES) %>%
select(everything(), COORDINATES = coord)
# View(vd_picc_vents)
picc_vents <- insitu_asv_wClass %>%
filter(SAMPLETYPE == "Vent") %>%
filter(SITE == "Piccard")
vd_vents <- insitu_asv_wClass %>%
filter(SAMPLETYPE == "Vent") %>%
filter(SITE == "VonDamm") %>%
mutate(coord = case_when(
VENT == "ShrimpHole" ~ "18.374893, -81.797441",
TRUE ~ COORDINATES
)) %>%
select(-COORDINATES) %>%
select(everything(), COORDINATES = coord)
est_distances_mcr <- function(df){
df_out <- df %>%
select(VENT, COORDINATES) %>%
separate(COORDINATES, into = c("lat", "long"), sep = ", ", convert = TRUE) %>%
distinct(VENT, .keep_all = TRUE) %>%
relocate(LONG = long, LAT = lat) %>%
column_to_rownames(var = "VENT")
vents <- row.names(df_out)
distance_m <- as.data.frame(distm(df_out, fun = distHaversine))
colnames(distance_m) <- vents
row.names(distance_m) <- vents
# Create matrix with distance in meters
dist_m <- distance_m %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
filter(!(start == end))
# create wide format
df_out_wide <- df %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
# REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION == "Axial" ~ VENT
)) %>% select(-Sample_tmp) %>%
# unite(VENTNAME, sep = "_", remove = FALSE) %>%
group_by(FeatureID, VENT) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "VENT")
# Assign row names and calc distance metric
vents <- row.names(df_out_wide)
dist_bray <- as.data.frame(as.matrix(vegdist(df_out_wide, method = "bray"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Bray_Curtis_metric = value)
dist_jacc <- as.data.frame(as.matrix(vegdist(df_out_wide, method = "jaccard"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Jaccard_metric = value)
dist_jacc_clr <- as.data.frame(as.matrix(vegdist(compositions::clr(df_out_wide), method = "euclidean"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Jaccard_CLR_metric = value)
dist_ilr_euc <- as.data.frame(as.matrix(dist(compositions::ilr(df_out_wide), method = "euclidean"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Euclidean_ILR_metric = value)
# Combine all distance outputs - use dist_m
dist_compiled_output <- dist_m %>%
select(start, end, meters = value) %>%
left_join(dist_jacc_clr) %>%
left_join(dist_bray) %>%
left_join(dist_jacc) %>%
left_join(dist_ilr_euc) %>%
pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>%
filter(!(start == end))
}Calculate distance matrix for MCR samples
vd_vents_dist <- est_distances_mcr(vd_vents)## Warning: Expected 4 pieces. Additional pieces discarded in 4232 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
picc_vents_dist <- est_distances_mcr(picc_vents)## Warning: Expected 4 pieces. Additional pieces discarded in 1280 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
vd_picc_dist <- est_distances_mcr(vd_picc_vents)## Warning: Expected 4 pieces. Additional pieces discarded in 5512 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# View(vd_vents_dist)
# View(vd_vents)Something is off about the Shrimp Hole lat/long?
# head(vd_vents_dist)
ggplot(vd_vents_dist, aes(x = meters, y = value)) +
geom_jitter(stat = "identity") +
facet_grid(comm_dist ~ ., scales = "free") +
geom_smooth(method="lm") +
labs(title = "Von Damm", x = "Distance (m)", y = "Metric") +
theme_linedraw() +
ggplot(picc_vents_dist, aes(x = meters, y = value)) +
geom_jitter(stat = "identity") +
facet_grid(comm_dist ~ ., scales = "free") +
geom_smooth(method="lm") +
labs(title = "Piccard", x = "Distance (m)", y = "Metric") +
theme_linedraw() +
patchwork::plot_layout(ncol = 2)# ggplot(vd_picc_dist, aes(x = meters, y = value)) +
# geom_jitter(stat = "identity") +
# facet_grid(comm_dist ~ ., scales = "free")
# View(vd_picc_dist)head(vd_picc_shared)## # A tibble: 6 × 35
## FeatureID SAMPLE value Taxon Domain Supergroup Phylum Class Order Family Genus
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 006f5664… 66_MC… 281 Euka… Eukar… Stramenop… Ochro… Chry… Chry… Chrys… Chry…
## 2 006f5664… 70_MC… 74 Euka… Eukar… Stramenop… Ochro… Chry… Chry… Chrys… Chry…
## 3 006f5664… 71_MC… 117 Euka… Eukar… Stramenop… Ochro… Chry… Chry… Chrys… Chry…
## 4 006f5664… 77_MC… 87 Euka… Eukar… Stramenop… Ochro… Chry… Chry… Chrys… Chry…
## 5 006f5664… 78_MC… 52 Euka… Eukar… Stramenop… Ochro… Chry… Chry… Chrys… Chry…
## 6 006f5664… 80_MC… 74 Euka… Eukar… Stramenop… Ochro… Chry… Chry… Chrys… Chry…
## # … with 24 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## # VENT <chr>, COORDINATES <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>,
## # SAMPLETYPE <chr>, YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>,
## # Mg <chr>, H2 <chr>, H2S <chr>, CH4 <chr>, ProkConc <chr>,
## # Sample_or_Control <chr>, DATASET <chr>, DECONTAM <chr>, CLASS <chr>,
## # SITE_CLASS <chr>, OCEAN <chr>
length(unique(vd_picc_shared$FeatureID))## [1] 235
tmp3 <- vd_picc_shared %>%
filter(value > 25)
length(unique(tmp3$FeatureID))## [1] 206
more_than_25 <- as.character(unique(tmp3$FeatureID))vd_picc_shared %>%
filter(FeatureID %in% more_than_25) %>%
filter(!is.na(Phylum)) %>%
filter(!(Phylum == "Metazoa")) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
# REGION == "Axial" ~ VENT
)) %>% select(-Sample_tmp) %>%
group_by(SITE, YEAR, VENTNAME, FeatureID, Taxon, Domain, Supergroup, Phylum, Class) %>%
summarise(SUM = sum(value)) %>%
ungroup() %>%
mutate(BIN = ifelse(SUM > 0, 1, NA)) %>%
# pivot_wider(names_from = SAMPLENAME, values_from = BIN) %>%
ggplot(aes(x = VENTNAME, y = FeatureID, fill = BIN)) +
geom_tile(fill = "grey", color = "black") +
theme_minimal() +
facet_grid(Phylum ~ SITE + YEAR, space = "free", scales = "free") +
theme(axis.text.y = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5))## Warning: Expected 4 pieces. Additional pieces discarded in 737 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, all, Domain, "Eukaryota", CLASS, all_class, Phylum)# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, all, Domain, "Eukaryota", SITE_CLASS, "All sites", Phylum)Out of all the ASVs and sequences, what percentage of ASVs were found at all 4 of my vent sites? what % of ASVs were unique to individual sites? what % of sequences?
totalasvs <- length(unique(insitu_asv_wClass$FeatureID)); totalasvs## [1] 12375
totalseq <- sum(insitu_asv_wClass$value); totalseq## [1] 3788734
unique(insitu_asv_wClass$SITE_CLASS)## [1] "Gorda Ridge only" "Axial only"
## [3] "Von Damm only" "Piccard & Von Damm"
## [5] "Piccard only" "MCR & Axial"
## [7] "Axial & Gorda Ridge" "Von Damm & Gorda Ridge"
## [9] "Von Damm, Axial, & Gorda Ridge" "Von Damm & Axial"
## [11] "All sites" "MCR & Gorda Ridge"
## [13] "Piccard, Axial, & Gorda Ridge" "Piccard & Gorda Ridge"
## [15] "Piccard & Axial"
tmp_unique <- filter(insitu_asv_wClass, grepl(" only", SITE_CLASS))
tmp_shared <- filter(insitu_asv_wClass, !(grepl(" only", SITE_CLASS)))
tmp_allshared <- filter(insitu_asv_wClass, SITE_CLASS == "All sites")
# Total number of ASVs that were found only at an individual vent site
a <- length(unique(tmp_unique$FeatureID)); a## [1] 10191
100*(a/totalasvs)## [1] 82.35152
a_sum <- sum(tmp_unique$value); a_sum## [1] 1187229
100*(a_sum/totalseq)## [1] 31.33577
# Total number of ASVs that were found at more than 1 site
b <- length(unique(tmp_shared$FeatureID)); b## [1] 2184
100*(b/totalasvs)## [1] 17.64848
b_sum <- sum(tmp_shared$value); b_sum## [1] 2601505
100*(b_sum/totalseq)## [1] 68.66423
# Total number of ASVs designated to be found at all sites
c <- length(unique(tmp_allshared$FeatureID)); c## [1] 194
100*(c/totalasvs)## [1] 1.567677
c_sum <- sum(tmp_allshared$value); c_sum## [1] 834734
100*(c_sum/totalseq)## [1] 22.032
From the vent-only ASVs, what percentage of them appear only at 1 site? what percent also appear at other vent sites?
resident_only <- filter(insitu_asv_wClass, CLASS == "Vent only")
totalasvs <- length(unique(resident_only$FeatureID)); totalasvs## [1] 8107
totalseq <- sum(resident_only$value); totalseq## [1] 1267675
# resident includes
100*(totalasvs/length(unique(insitu_asv_wClass$FeatureID)))## [1] 65.51111
100*(totalseq/length(unique(insitu_asv_wClass$FeatureID)))## [1] 10243.84
# unique(resident_only$SITE_CLASS)
tmp_unique <- filter(resident_only, grepl(" only", SITE_CLASS))
tmp_shared <- filter(resident_only, !(grepl(" only", SITE_CLASS)))
tmp_allshared <- filter(resident_only, SITE_CLASS == "All sites")
# Of the ASVs found ONLY within diffuse venting fluid, just over 90% were unique to only the individual vent site - makes up 53% of the vent only sequences
a <- length(unique(tmp_unique$FeatureID)); a## [1] 7325
100*(a/totalasvs)## [1] 90.35402
a_sum <- sum(tmp_unique$value); a_sum## [1] 682645
100*(a_sum/totalseq)## [1] 53.85016
# Of the ASVs from vent only, 9.6% were also found at other vent sites, totals to 46% of the sequences.
b <- length(unique(tmp_shared$FeatureID)); b## [1] 782
100*(b/totalasvs)## [1] 9.645985
b_sum <- sum(tmp_shared$value); b_sum ## [1] 585030
100*(b_sum/totalseq)## [1] 46.14984
# Total number of ASVs designated to be found at all sites
c <- length(unique(tmp_allshared$FeatureID)); c## [1] 7
100*(c/totalasvs)## [1] 0.08634513
c_sum <- sum(tmp_allshared$value); c_sum## [1] 3022
100*(c_sum/totalseq)## [1] 0.2383892
vent_only_allsites <- tmp_allshared
# View(vent_only_allsites)
# unique(vent_only_allsites$Taxon)
# unique(vent_only_allsites$FeatureID)# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(resident_only, 10, all, Domain, "Eukaryota", CLASS, all_class, Phylum)Isolate ASVs that were found in all habitat types! (Vent, plume, & background)
# unique(insitu_asv_wClass$CLASS)
cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
# cosmo <- c("Vent, plume, & background")
cosmo_only <- filter(insitu_asv_wClass, CLASS %in% cosmo)
totalasvs <- length(unique(cosmo_only$FeatureID)); totalasvs## [1] 2133
totalseq <- sum(cosmo_only$value); totalseq## [1] 2424982
100*(totalasvs/length(unique(insitu_asv_wClass$FeatureID)))## [1] 17.23636
100*(totalseq/sum(insitu_asv_wClass$value))## [1] 64.00507
# unique(cosmo_only$SITE_CLASS)
tmp_unique <- filter(cosmo_only, grepl(" only", SITE_CLASS))
tmp_shared <- filter(cosmo_only, !(grepl(" only", SITE_CLASS)))
tmp_allshared <- filter(cosmo_only, SITE_CLASS == "All sites")
# Of the ASVs found in all habitat types (vent, plume, and background) - 22% of them were unique to a given site, but found throughout, consisting of 7.6%
a <- length(unique(tmp_unique$FeatureID)); a## [1] 761
100*(a/totalasvs)## [1] 35.67745
a_sum <- sum(tmp_unique$value); a_sum## [1] 414955
100*(a_sum/totalseq)## [1] 17.11167
# Of the ASVs found in all habitat types, 77% of them were also found at another vent site, consisting of 92% of the sequences
b <- length(unique(tmp_shared$FeatureID)); b## [1] 1372
100*(b/totalasvs)## [1] 64.32255
b_sum <- sum(tmp_shared$value); b_sum ## [1] 2010027
100*(b_sum/totalseq)## [1] 82.88833
# Of the ASVS found in all habitat types, 16% of them were found at all vent sites, which made up 40% of the sequences.
c <- length(unique(tmp_allshared$FeatureID)); c## [1] 187
100*(c/totalasvs)## [1] 8.766995
c_sum <- sum(tmp_allshared$value); c_sum## [1] 831712
100*(c_sum/totalseq)## [1] 34.29766
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(cosmo_only, 10, all, Domain, "Eukaryota", CLASS, all_class, Phylum)# cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo <- c("Vent, plume, & background")
# cosmo_only <- filter(insitu_asv_wClass, CLASS %in% cosmo)
res <- c("Vent only")isolate_calc_jaccard <- function(df, slice){
df_out_wide <- df %>%
filter(Domain == "Eukaryota") %>%
filter(CLASS %in% slice) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = FALSE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ paste(SITE, VENT, sep = " "),
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(SITE, VENT, YEAR, sep = " ")
# REGION == "Axial" ~ VENT
)) %>% select(-Sample_tmp) %>%
unite(SAMPLEID, SITE, VENT, YEAR, SAMPLETYPE, sep = "-", remove = FALSE) %>%
unite(sample_full, SAMPLEID, VENTNAME, sep = "--") %>%
group_by(FeatureID, sample_full) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "sample_full")
}
resident <- isolate_calc_jaccard(insitu_asv_wClass, res)## Warning: Expected 4 pieces. Additional pieces discarded in 10510 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
cosmopolitan <- isolate_calc_jaccard(insitu_asv_wClass, cosmo)## Warning: Expected 4 pieces. Additional pieces discarded in 8429 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# dist_jacc <- as.data.frame(as.matrix(vegdist(df_out_wide, method = "jaccard"))) %>%
# rownames_to_column(var = "start") %>%
# pivot_longer(cols = vents, names_to = "end") %>%
# select(start, end, Jaccard_metric = value)
# Assign row names and calc distance metric
# vents <- row.names(df_out_wide)
# dist_bray <- as.data.frame(as.matrix(vegdist(df_out_wide, method = "bray"))) %>%
# rownames_to_column(var = "start") %>%
# pivot_longer(cols = vents, names_to = "end") %>%
# select(start, end, Bray_Curtis_metric = value) %>%
# distinct()
dist_jacc_clr_res <- as.data.frame(as.matrix(vegdist(compositions::clr(resident), method = "euclidean"))) %>%
# dist_jacc_clr_res <- as.data.frame(as.matrix(vegdist(resident, method = "bray"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = row.names(resident), names_to = "end") %>%
select(start, end, Jaccard_CLR_metric = value) %>%
mutate(VARS = purrr::map2_chr(start, end, ~toString(sort(c(.x, .y))))) %>%
distinct(VARS, .keep_all = TRUE) %>%
separate(VARS, c("X", "Y"), sep = ", ") %>%
select(-start, -end) %>%
add_column(SUBSET = "RESIDENT")
# range(dist_jacc_clr_res$Jaccard_CLR_metric)
# ggplot(dist_jacc_clr_res, aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
# geom_tile() +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
dist_jacc_clr_cos <- as.data.frame(as.matrix(vegdist(compositions::clr(cosmopolitan), method = "euclidean"))) %>%
# dist_jacc_clr_cos <- as.data.frame(as.matrix(vegdist(cosmopolitan, method = "bray"))) %>%
rownames_to_column(var = "start") %>%
pivot_longer(cols = row.names(cosmopolitan), names_to = "end") %>%
select(start, end, Jaccard_CLR_metric = value) %>%
mutate(VARS = purrr::map2_chr(start, end, ~toString(sort(c(.x, .y))))) %>%
distinct(VARS, .keep_all = TRUE) %>%
# Flip the Y and X variables for cosmopolitan - will appear at bottom
separate(VARS, c("Y", "X"), sep = ", ") %>%
select(-start, -end) %>%
add_column(SUBSET = "COSMOPOLITAN")
###
joined_dist <- dist_jacc_clr_cos %>%
rbind(dist_jacc_clr_res) %>%
separate(Y, c("Y_FULL", "Y"), sep = "--") %>%
separate(Y_FULL, c("Y_SITE", "Y_VENT", "Y_YEAR"), sep = "-") %>%
separate(X, c("X_FULL", "X"), sep = "--") %>%
separate(X_FULL, c("X_SITE", "X_VENT", "X_YEAR"), sep = "-")## Warning: Expected 3 pieces. Additional pieces discarded in 930 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 3 pieces. Additional pieces discarded in 930 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(joined_dist)ggplot(joined_dist, aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
geom_tile(color = "#474440") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
axis.title = element_blank()) +
scale_fill_gradientn(colors = c("#badbdb", "#dedad2", "#e4bcad", "#df979e", "#d7658b", "#c80064")) +
coord_fixed(ratio = 1) # facet_grid(Y_SITE ~ X_SITE, scale = "free", space = "free")Look at lowest Jaccard metric among samples that are from separate sites.
head(joined_dist)## # A tibble: 6 × 10
## Jaccard_CLR_metr… Y_SITE Y_VENT Y_YEAR Y X_SITE X_VENT X_YEAR X SUBSET
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 0 Gorda… Cande… 2019 Gord… Gorda… Cande… 2019 Gord… COSMO…
## 2 22.5 Gorda… Cande… 2019 Gord… Gorda… Deep … 2019 Gord… COSMO…
## 3 23.6 Gorda… Cande… 2019 Gord… Gorda… Mt Ed… 2019 Gord… COSMO…
## 4 25.9 Gorda… Cande… 2019 Gord… Gorda… Near … 2019 Gord… COSMO…
## 5 22.1 Gorda… Cande… 2019 Gord… Gorda… Shall… 2019 Gord… COSMO…
## 6 29.4 Gorda… Cande… 2019 Gord… Gorda… Sir V… 2019 Gord… COSMO…
# Across site similarities
# View(joined_dist %>%
# # Isolate pairwise comparisons from with each site
# filter(X_SITE != Y_SITE) %>%
# # Isolate resident
# filter(SUBSET == "RESIDENT") %>%
# arrange(Jaccard_CLR_metric))# Not sure if I need this
tax_key <- insitu_asv_wClass %>%
select(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species, CLASS, SITE_CLASS) %>%
distinct()# head(insitu_asv_wClass)
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
tmp <- insitu_asv_wClass %>%
filter(Domain == "Eukaryota") %>%
filter(!is.na(Supergroup)) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = "SAMPLE", values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "FeatureID") %>%
mutate(PREVALENCE = rowSums(. > 0),
SEQ_TOTAL = rowSums(.)) %>%
rownames_to_column(var = "FeatureID") %>%
left_join(tax_key) %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
))## Warning: Expected 4 pieces. Additional pieces discarded in 25354 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(tmp)
tmp %>%
filter(SEQ_TOTAL > 0) %>%
ggplot(aes(x = PREVALENCE, y = SEQ_TOTAL, fill = SUPERGROUP)) +
geom_jitter(stat = "identity", shape = 21) +
scale_y_log10() +
facet_wrap(SUPERGROUP ~ .) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
theme_linedraw() +
labs(x = "Number of samples ASV appears in", y = "Total sequences (log)")head(insitu_asv_wClass) # from above, where I've classified each ASV by site and occurence in sample type## # A tibble: 6 × 34
## FeatureID SAMPLE value Taxon Domain Supergroup Phylum Class Order Family Genus
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 00056209… Gorda… 8 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 2 00056209… Gorda… 13 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 3 00096455… Gorda… 91 Euka… Eukar… Rhizaria Radio… Acan… <NA> <NA> <NA>
## 4 000ee377… Axial… 282 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 5 000ee377… Axial… 32 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 6 00165708… Gorda… 1 Euka… Eukar… Stramenop… Ochro… Pela… Pela… Pelag… Pela…
## # … with 23 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## # VENT <chr>, COORDINATES <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>,
## # SAMPLETYPE <chr>, YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>,
## # Mg <chr>, H2 <chr>, H2S <chr>, CH4 <chr>, ProkConc <chr>,
## # Sample_or_Control <chr>, DATASET <chr>, DECONTAM <chr>, CLASS <chr>,
## # SITE_CLASS <chr>
unique(insitu_asv_wClass$CLASS)## [1] "Vent only" "Background only"
## [3] "Vent & background" "Vent, plume, & background"
## [5] "Plume only" "Vent & plume"
## [7] "Plume & background"
unique(insitu_asv_wClass$SITE_CLASS)## [1] "Gorda Ridge only" "Axial only"
## [3] "Von Damm only" "Piccard & Von Damm"
## [5] "Piccard only" "MCR & Axial"
## [7] "Axial & Gorda Ridge" "Von Damm & Gorda Ridge"
## [9] "Von Damm, Axial, & Gorda Ridge" "Von Damm & Axial"
## [11] "All sites" "MCR & Gorda Ridge"
## [13] "Piccard, Axial, & Gorda Ridge" "Piccard & Gorda Ridge"
## [15] "Piccard & Axial"
unique(insitu_asv_wClass$SAMPLETYPE)## [1] "Vent" "Background" "Plume"
tmp <- (insitu_asv_wClass %>%
filter(DATASET == "MCR") %>%
group_by(CLASS) %>%
summarise(SEQ = sum(value),
COUNT = n()))
tmp## # A tibble: 7 × 3
## CLASS SEQ COUNT
## <chr> <dbl> <int>
## 1 Background only 16529 383
## 2 Plume & background 93779 250
## 3 Plume only 24647 489
## 4 Vent & background 181232 818
## 5 Vent & plume 217335 478
## 6 Vent only 587304 3953
## 7 Vent, plume, & background 954469 3006
# Vent only
587304/sum(tmp$SEQ) #33%## [1] 0.2829978
3953/sum(tmp$COUNT)## [1] 0.4215634
# Cosmo
954469/sum(tmp$SEQ)## [1] 0.4599197
3006/sum(tmp$COUNT)## [1] 0.3205716
33% of sequences were vent-only 42% of ASVs were vent-only
45% of sequences were cosmopolitan 32% of ASVs were cosmopolitan
What Supergroups are associated with resident vs. endemic? what about to specific sites?
make_bar_bycategory <- function(df, category, position){
CATEGORY <- enquo(category)
df_out <- df %>%
filter(Domain == "Eukaryota") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
VENT, SITE, SAMPLETYPE, YEAR, DATASET, !!CATEGORY) %>%
summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup()
## Supergroup
supergroup <- df_out %>%
group_by(Supergroup, !!CATEGORY) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = !!CATEGORY, y = SEQ_SUM, fill = Supergroup)) +
geom_bar(stat = "identity", position = position, color = "black", width = 0.9) +
# facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
scale_y_continuous(expand = c(0,0)) +
# scale_fill_brewer(palette = "Set2") +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
labs(x = "", y = "Relative abundance")
## Phylum
phylum <- df_out %>%
unite(SupergroupPhylum, Supergroup, Phylum, sep = "-") %>%
group_by(SupergroupPhylum, !!CATEGORY) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = !!CATEGORY, y = SEQ_SUM, fill = SupergroupPhylum)) +
geom_bar(stat = "identity", position = position, color = "black", width = 0.9) +
# facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black", "white", "#969696", "#525252", "#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black", "white")) +
labs(x = "", y = "Relative abundance")
supergroup + phylum + patchwork::plot_layout(ncol = 1)
}make_tile_bycategory <- function(df, category, position){
CATEGORY <- enquo(category)
df_out <- df %>%
filter(Domain == "Eukaryota") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
VENT, SITE, SAMPLETYPE, YEAR, DATASET, !!CATEGORY) %>%
summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup()
## Supergroup
supergroup <- df_out %>%
group_by(Supergroup, !!CATEGORY) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = !!CATEGORY, fill = log(SEQ_SUM), y = Supergroup)) +
geom_tile(color = "black") +
# facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
scale_fill_gradient(low = "#ffeda0", high = "#e31a1c", na.value = "grey50") +
labs(x = "Distribution", y = "")
supergroup
}make_tile_bycategory(insitu_asv_wClass, CLASS, "fill")# make_bar_bycategory(insitu_asv_wClass, CLASS, "fill")make_bar_bycategory(insitu_asv_wClass, CLASS, "stack")make_bar_bycategory(insitu_asv_wClass, SITE_CLASS, "fill")make_tile_bycategory(insitu_asv_wClass, SITE_CLASS, "fill")make_bar_bycategory(insitu_asv_wClass, SITE_CLASS, "stack")head(insitu_asv_wClass)## # A tibble: 6 × 34
## FeatureID SAMPLE value Taxon Domain Supergroup Phylum Class Order Family Genus
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 00056209… Gorda… 8 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 2 00056209… Gorda… 13 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 3 00096455… Gorda… 91 Euka… Eukar… Rhizaria Radio… Acan… <NA> <NA> <NA>
## 4 000ee377… Axial… 282 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 5 000ee377… Axial… 32 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 6 00165708… Gorda… 1 Euka… Eukar… Stramenop… Ochro… Pela… Pela… Pelag… Pela…
## # … with 23 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## # VENT <chr>, COORDINATES <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>,
## # SAMPLETYPE <chr>, YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>,
## # Mg <chr>, H2 <chr>, H2S <chr>, CH4 <chr>, ProkConc <chr>,
## # Sample_or_Control <chr>, DATASET <chr>, DECONTAM <chr>, CLASS <chr>,
## # SITE_CLASS <chr>
categories <- c("Vent only", "Vent, plume, & background")
insitu_asv_wClass %>%
filter(CLASS %in% categories) %>%
mutate(CAT = case_when(
CLASS == "Vent only" ~ "Resident",
TRUE ~ "Cosmopolitan"
)) %>%
group_by(CAT) %>%
summarise(SUM = sum(value),
COUNT = n()) %>%
pivot_longer(c(SUM, COUNT)) %>%
ggplot(aes(x = name, y = value, fill = CAT))+
geom_bar(stat = "identity", position = "dodge", color = "black") +
theme_linedraw() +
facet_grid(name ~ ., scales = "free")+
scale_fill_brewer(palette = "Dark2") +
theme(legend.title = element_blank()) +
labs(x = "", y = "Total number of ASVs")head(insitu_asv_wClass)## # A tibble: 6 × 34
## FeatureID SAMPLE value Taxon Domain Supergroup Phylum Class Order Family Genus
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 00056209… Gorda… 8 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 2 00056209… Gorda… 13 Euka… Eukar… Stramenop… Sagen… <NA> <NA> <NA> <NA>
## 3 00096455… Gorda… 91 Euka… Eukar… Rhizaria Radio… Acan… <NA> <NA> <NA>
## 4 000ee377… Axial… 282 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 5 000ee377… Axial… 32 Euka… Eukar… Alveolata Cilio… Nass… Nass… Disco… NASS…
## 6 00165708… Gorda… 1 Euka… Eukar… Stramenop… Ochro… Pela… Pela… Pelag… Pela…
## # … with 23 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## # VENT <chr>, COORDINATES <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>,
## # SAMPLETYPE <chr>, YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>,
## # Mg <chr>, H2 <chr>, H2S <chr>, CH4 <chr>, ProkConc <chr>,
## # Sample_or_Control <chr>, DATASET <chr>, DECONTAM <chr>, CLASS <chr>,
## # SITE_CLASS <chr>
unique(insitu_asv_wClass$CLASS)## [1] "Vent only" "Background only"
## [3] "Vent & background" "Vent, plume, & background"
## [5] "Plume only" "Vent & plume"
## [7] "Plume & background"
unique(insitu_asv_wClass$SITE_CLASS)## [1] "Gorda Ridge only" "Axial only"
## [3] "Von Damm only" "Piccard & Von Damm"
## [5] "Piccard only" "MCR & Axial"
## [7] "Axial & Gorda Ridge" "Von Damm & Gorda Ridge"
## [9] "Von Damm, Axial, & Gorda Ridge" "Von Damm & Axial"
## [11] "All sites" "MCR & Gorda Ridge"
## [13] "Piccard, Axial, & Gorda Ridge" "Piccard & Gorda Ridge"
## [15] "Piccard & Axial"
unique(insitu_asv_wClass$SAMPLETYPE)## [1] "Vent" "Background" "Plume"
# head(insitu_asv_wClass)
insitu_asv_wClass %>%
# filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
filter(!is.na(Supergroup)) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup, Phylum, CLASS, SITE_CLASS) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, VENTNAME, sep = " ", remove = FALSE) %>%
# filter(CLASS == "Vent only") %>%
group_by(Supergroup, CLASS) %>%
summarise(SEQ_SUM = sum(AVG),
ASV_COUNT = n()) %>%
pivot_longer(cols = c(SEQ_SUM, ASV_COUNT)) %>%
filter(name == "SEQ_SUM") %>%
ggplot(aes(x = CLASS, y = value, fill = Supergroup)) +
geom_hline(yintercept = 0) +
geom_segment(aes(x = CLASS, xend = CLASS,
y = 0, yend = value, color = Supergroup),
lineend = "butt", size = 1) +
geom_point(size = 2, shape = 19, aes(color = Supergroup)) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
scale_color_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
theme_bw() +
facet_grid(. ~ Supergroup, scales = "free") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, color = "black", size = 11),
axis.text.y = element_text(color = "black", size = 12),
panel.spacing.x = unit(0, "lines"),panel.spacing.y = unit(0, "lines"),
panel.border = element_blank(),
panel.grid = element_blank(),
strip.background.x = element_blank(),
strip.text = element_text(size = 11),
legend.position = "none") +
coord_flip() +
labs(x = "", y ="Total sequences", title = "Number of 'vent-only' sequences by Supergroup & location")## Warning: Expected 4 pieces. Additional pieces discarded in 25354 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# ?scale_fill_brewer# head(insitu_asv_wClass)
insitu_asv_wClass %>%
# filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
filter(!is.na(Supergroup)) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup, Phylum, CLASS, SITE_CLASS) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
filter(CLASS == "Vent only") %>%
group_by(Supergroup, SITE_CLASS) %>%
summarise(SEQ_SUM = sum(AVG),
ASV_COUNT = n()) %>%
pivot_longer(cols = c(SEQ_SUM, ASV_COUNT)) %>%
filter(name != "SEQ_SUM") %>%
ggplot(aes(x = SITE_CLASS, y = value, fill = Supergroup)) +
geom_hline(yintercept = 0) +
geom_segment(aes(x = SITE_CLASS, xend = SITE_CLASS,
y = 0, yend = value, color = Supergroup),
lineend = "butt", size = 1) +
geom_point(size = 2, shape = 19, aes(color = Supergroup)) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
scale_color_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
theme_bw() +
facet_grid(. ~ Supergroup, scales = "free") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, color = "black", size = 11),
axis.text.y = element_text(color = "black", size = 12),
panel.spacing.x = unit(0, "lines"),panel.spacing.y = unit(0, "lines"),
panel.border = element_blank(),
panel.grid = element_blank(),
strip.background.x = element_blank(),
strip.text = element_text(size = 11),
legend.position = "none") +
coord_flip() +
labs(x = "", y ="Total ASVs", title = "Number of 'vent-only' ASVs by Supergroup & location")## Warning: Expected 4 pieces. Additional pieces discarded in 25354 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# ?scale_fill_brewerHere, I’ve isolated almost 800,000 sequences belonging to the putative endemic ASVs (vent only), totaling to 3789 ASVs. This subset includes ASVs with 10 or more sequences (a threshold to reduce noise).
endemic <- insitu_asv_wClass %>%
filter(Supergroup != "Opisthokonta") %>%
filter(CLASS == "Vent only") %>%
filter(value > 9) %>%
filter(!is.na(Supergroup))
# Sum of putative endemic sequences and ASVs
sum(endemic$value)## [1] 795686
length(unique(endemic$FeatureID))## [1] 3789
Tile plot by Class level? CLR? Coord flip below and add environmental data as heatmap along side? Combine years from Axial, group by site? Do a better compilation of taxa… additional thresholds?
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
endemic_processed <- endemic %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
)) %>%
mutate(PHYLUM = case_when(
Phylum == "Unknown" ~ paste(SUPERGROUP, "Other"),
grepl("_X", Phylum) ~ paste(SUPERGROUP, "Other"),
is.na(Phylum) ~ paste(SUPERGROUP, "Other"),
TRUE ~ Phylum
)) %>%
mutate(CLASS = case_when(
Class == "Unknown" ~ PHYLUM,
grepl("_X", Class) ~ PHYLUM,
is.na(Class) ~ Phylum,
grepl("MAST-", Class) ~ "MAST",
TRUE ~ Class
)) %>%
filter(SUPERGROUP != "Archaeplastida") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Domain, SUPERGROUP, PHYLUM, CLASS, Order, Family, Genus, Species) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
filter(!is.na(SUPERGROUP)) %>%
# Sum to the Order taxonomic classification
unite(SAMPLENAME_2, SAMPLENAME, VENT, sep = "_") %>%
group_by(SAMPLENAME_2, SUPERGROUP, PHYLUM, CLASS) %>%
summarise(CLASS_SUM = sum(AVG)) %>%
unite(CLASS, SUPERGROUP, PHYLUM, CLASS, sep = "_") %>%
select(CLASS, SAMPLENAME_2, CLASS_SUM) %>%
pivot_wider(names_from = SAMPLENAME_2, values_from = CLASS_SUM, values_fill = 0) %>%
column_to_rownames(var = "CLASS")
# head(endemic_processed)## Take wide data frame and CLR transform, pivot to wide, and plot
# svg("tileplot-endemic-bysample.svg", h = 6, w = 20)
data.frame(compositions::clr(endemic_processed)) %>%
rownames_to_column(var = "CLASS") %>%
pivot_longer(cols = starts_with(all), values_to = "CLR", names_to = "SAMPLENAME_2") %>%
separate(SAMPLENAME_2, c("SAMPLENAME", "VENT"), sep = "_") %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ VENT
# REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, VENTNAME, sep = " ", remove = FALSE) %>%
separate(CLASS, c("Supergroup", "Phylum", "Class"), sep = "_", remove = FALSE) %>%
ggplot(aes(x = SAMPLE, y = Class, fill = CLR)) +
geom_tile(color = "#252525") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black",size = 8),
axis.text.y = element_text(color = "black", size = 8),
strip.background = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 0.5, angle = 0),
legend.title = element_blank(),
strip.placement = "outside") +
labs(x = "", y = "") +
coord_flip() +
# scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", na.value = "grey50") +
scale_fill_steps2(
low = "#2166ac",
mid = "white",
high = "#b2182b",
midpoint = 0,
space = "Lab",
na.value = "#4d4d4d",
guide = "coloursteps",
aesthetics = "fill"
) +
facet_grid(SITE ~ Supergroup + Phylum, space = "free", scales = "free", switch = "both")## Warning: Expected 4 pieces. Additional pieces discarded in 2752 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 3 pieces. Additional pieces discarded in 224 rows [33, 34, 35,
## 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, ...].
# dev.off()
# ?scale_fill_steps2()alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
ciliate <- c("Ciliophora")
plot_endemic_fingerprint <- function(threshold, selection){
endemic_processed_asv <- endemic %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
Supergroup %in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
TRUE ~ Supergroup
)) %>%
mutate(PHYLUM = case_when(
Phylum == "Unknown" ~ paste(SUPERGROUP, "Other"),
grepl("_X", Phylum) ~ paste(SUPERGROUP, "Other"),
is.na(Phylum) ~ paste(SUPERGROUP, "Other"),
TRUE ~ Phylum
)) %>%
mutate(CLASS = case_when(
Class == "Unknown" ~ PHYLUM,
grepl("_X", Class) ~ PHYLUM,
is.na(Class) ~ Phylum,
grepl("MAST-", Class) ~ "MAST",
TRUE ~ Class
)) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Domain, SUPERGROUP, PHYLUM, CLASS, Order, Family, Genus, Species) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
filter(!is.na(SUPERGROUP)) %>%
filter(!is.na(PHYLUM)) %>%
filter(AVG > threshold) %>%
filter(PHYLUM %in% selection) %>%
# Sum to the Order taxonomic classification
unite(SAMPLENAME_2, SAMPLENAME, VENT, sep = "_") %>%
group_by(SAMPLENAME_2, SUPERGROUP, PHYLUM, FeatureID) %>%
summarise(SUM = sum(AVG)) %>%
unite(PHYLUM_ASV, PHYLUM, FeatureID, sep = "_") %>%
select(PHYLUM_ASV, SAMPLENAME_2, SUM) %>%
pivot_wider(names_from = SAMPLENAME_2, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "PHYLUM_ASV")
###
data.frame(compositions::clr(endemic_processed_asv)) %>%
rownames_to_column(var = "PHYLUM_ASV") %>%
pivot_longer(cols = starts_with(all), values_to = "CLR", names_to = "SAMPLENAME_2") %>%
separate(SAMPLENAME_2, c("SAMPLENAME", "VENT"), sep = "_") %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ VENT
# REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, VENTNAME, sep = " ", remove = FALSE) %>%
separate(PHYLUM_ASV, c("Phylum", "FeatureID"), sep = "_", remove = FALSE) %>%
ggplot(aes(x = SAMPLE, y = FeatureID, fill = CLR)) +
geom_tile() +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
# axis.text.y = element_blank(),
axis.text.y = element_text(color = "black", size = 8),
strip.background = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 0.5, angle = 0),
legend.title = element_blank(),
axis.ticks = element_blank(),
strip.placement = "outside") +
labs(x = "", y = "") +
coord_flip() +
# scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", na.value = "grey50") +
scale_fill_steps2(
low = "#2166ac",
mid = "white",
high = "#b2182b",
midpoint = 0,
space = "Lab",
na.value = "#4d4d4d",
guide = "coloursteps",
aesthetics = "fill"
) +
facet_grid(SITE ~ Phylum, space = "free", scales = "free", switch = "both")
}## Take wide data frame and CLR transform, pivot to wide, and plot
# svg("tileplot-endemic-bysample.svg", h = 6, w = 20)
plot_endemic_fingerprint(1, ciliate)## Warning: Expected 4 pieces. Additional pieces discarded in 29408 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# dev.off()unique(endemic$Phylum)## [1] "Sagenista" "Ciliophora" "Cercozoa"
## [4] "Radiolaria" "Ochrophyta" "Dinoflagellata"
## [7] "Stramenopiles_X" "Pseudofungi" "Haptophyta"
## [10] NA "Perkinsea" "Opalozoa"
## [13] "Telonemia" "Lobosa" "Chlorophyta"
## [16] "Picozoa" "Centroheliozoa" "Apusomonadidae"
## [19] "Hilomonadea" "Patescibacteria" "Chrompodellids"
## [22] "Katablepharidophyta" "Prasinodermophyta" "Metamonada"
## [25] "Conosa" "Proteobacteria" "Cryptophyta"
## [28] "Apicomplexa" "Euryarchaeota" "Alveolata_X"
## [31] "Rhodophyta" "Protalveolata_X" "Discoba"
## [34] "Breviatea" "Aquificae" "Streptophyta"
plot_endemic_fingerprint(0, ciliate)## Warning: Expected 4 pieces. Additional pieces discarded in 29408 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
pending CLR to triangle plot? triangle plot with relative abundance - are there distinct signatures of vent endemics by region?? Are there clusters on the triangle plot??
Subset dataset to create endemic dataset and a vent inclusive dataset.
make_bar_relabun(endemic, all) PCA analysis for resident population within each site. Reinforces what is seen in other plots, where some vent sites are more similar to one another.
plot_grid(
make_pca(endemic, axial),
make_pca(endemic, mcr),
make_pca(endemic, gr),
make_pca(endemic, all),
ncol = 2
)## Warning: Expected 4 pieces. Additional pieces discarded in 2505 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 2274 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 1372 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6151 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# make_pca(endemic, all)# colnames(endemic)
rm <- c("-", "", "nd", "bd", NA)
# x <- c("TEMP")
endemic_env <- function(x){
endemic %>%
unite(SAMPLENAME_2, SAMPLENAME, VENT, sep = "_") %>%
separate(SAMPLENAME_2, c("SAMPLENAME", "VENT"), sep = "_") %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ VENT
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SITE, VENTNAME, sep = " ", remove = FALSE) %>%
select(SAMPLE, SITE, VENTNAME, DEPTH:ProkConc) %>%
pivot_longer(cols = TEMP:ProkConc, names_to = "MEASUREMENT", values_to = "VALUE") %>%
filter(MEASUREMENT == x) %>%
filter(!(VALUE %in% rm)) %>%
mutate(VALUE = as.numeric(as.character(VALUE))) %>%
distinct() %>%
ggplot(aes(x = SAMPLE, y = MEASUREMENT, fill = VALUE)) +
geom_tile() +
coord_flip() +
facet_grid(SITE ~ MEASUREMENT, switch = "both", space = "free", scale = "free") + theme_linedraw() +
theme(axis.text.y = element_blank(),
strip.text = element_blank(),
strip.background = element_blank(),
axis.ticks = element_blank(),
strip.placement = "outside",
legend.title = element_blank(),
legend.position = "top",
legend.text = element_text(size = 5),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank()) +
labs(x = "", y = "") +
scale_fill_distiller(palette = "Reds", direction=2, na.value = "grey50")
# viridis::scale_fill_viridis(option = "plasma")
}
##bdc3c7 → #2c3e50
# ?scale_fill_viridis
# endemic_env("TEMP")
# ?scale_fill_distiller# colnames(endemic)
# svg("env-heatmap.svg", w = 12, h = 4)
plot_grid(
endemic_env("TEMP") +
theme(axis.text.y = element_text(color = "black"),
strip.text.y = element_text(color = "black"),
strip.placement = "outside"),
endemic_env("PercSeawater"),
endemic_env("pH"),
endemic_env("Mg"),
# endemic_env("NO3"),
endemic_env("H2"),
endemic_env("CH4"),
endemic_env("H2S"),
endemic_env("ProkConc"),
nrow = 1,
rel_widths = c(5,1,1,1,1,1,1,1)
)## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 4 pieces. Additional pieces discarded in 6252 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# dev.off()Import grazing data as output from previous, plot with bubbles underneath of specific parameters.
?vegan::adonis
# distance matrix from vent-only ASV profiles and environmental parameters
#QUESTIONS
## What happens with NAs?
## Subset so I have as many samples and environmental parameters?load("asv-tables-processed-18102021.RData", verbose = T)## Loading objects:
## asv_insitu
## asv_insitu_qc
## insitu_asv_wClass
# head(asv_insitu_qc)
# head(asv_insitu_qc %>% select(SAMPLENAME, TEMP, pH, Mg, ProkConc) %>% distinct()
colnames(insitu_asv_wClass)## [1] "FeatureID" "SAMPLE" "value"
## [4] "Taxon" "Domain" "Supergroup"
## [7] "Phylum" "Class" "Order"
## [10] "Family" "Genus" "Species"
## [13] "Consensus" "SAMPLENAME" "VENT"
## [16] "COORDINATES" "SITE" "SAMPLEID"
## [19] "DEPTH" "SAMPLETYPE" "YEAR"
## [22] "TEMP" "pH" "PercSeawater"
## [25] "Mg" "H2" "H2S"
## [28] "CH4" "ProkConc" "Sample_or_Control"
## [31] "DATASET" "DECONTAM" "CLASS"
## [34] "SITE_CLASS"
vent_metadata <- insitu_asv_wClass %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume ", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) %>%
pivot_longer(cols = TEMP:ProkConc, names_to = "MEASUREMENT", values_to = "VALUE") %>%
group_by(SAMPLE, MEASUREMENT) %>%
summarise(MEAN = mean(VALUE)) %>%
distinct() %>%
pivot_wider(names_from = "MEASUREMENT", values_from = "MEAN") %>%
column_to_rownames(var = "SAMPLE")## Warning: Expected 4 pieces. Additional pieces discarded in 26125 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
## Warning in mean.default(VALUE): argument is not numeric or logical: returning NA
vent_asvs <- insitu_asv_wClass %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume ", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) %>%
group_by(SAMPLE, FeatureID) %>%
summarise(MEAN = mean(value)) %>%
ungroup() %>%
pivot_wider(names_from = "SAMPLE", values_from = "MEAN", values_fill = 0) %>%
column_to_rownames(var = "FeatureID")## Warning: Expected 4 pieces. Additional pieces discarded in 26125 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(vent_asvs)
# ?vegdist()
# vent_dist <- vegdist(vent_asvs, method = "jaccard")
# class(vent_dist)# tmp <- adonis(vent_dist ~ TEMP*ProkConc, data = vent_metadata, permutations = 99)
# head(vent_asvs)
# adonis(dune ~ Management * A1, data = dune.env, permutations = 99)
# class(dune.env)
# ?adonis()Plot sequence relative abundance by temperature and prokaryote concentration. These two parameters were chosen because I have the most metadata from them. If a sample was not countable or had no temperature record, it was removed.
# asv_insitu_qc %>%
# # filter(SITE %in% selection) %>%
# filter(!is.na(TEMP)) %>%
# filter(!is.na(ProkConc)) %>%
# filter(Domain == "Eukaryota") %>%
# mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
# Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
# Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
# Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
# group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
# VENT, SITE, SAMPLETYPE, YEAR, DATASET, TEMP, ProkConc) %>%
# summarise(SEQ_AVG_REP = mean(value)) %>%
# ungroup() %>%
# unite(SupergroupPhylum, Supergroup, Phylum, sep = "-") %>%
# group_by(SITE, SAMPLETYPE, VENT, YEAR, SupergroupPhylum, TEMP, ProkConc) %>%
# summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
# ggplot(aes(x = ProkConc, y = as.numeric(TEMP), fill = SITE, shape = SAMPLETYPE)) +
# geom_point(color = "black", aes(size = SEQ_SUM)) +
# scale_size_continuous(range = c(4,9)) +
# scale_shape_manual(values = c(21, 23, 24)) +
# scale_x_log10() +
# facet_wrap(SupergroupPhylum ~., scale = "free") +
# theme_linedraw() +
# theme(axis.text = element_text(color = "black", size = 12),
# strip.background = element_blank(), strip.text = element_text(color = "black"),
# legend.position = "right") +
# # scale_y_continuous(expand = c(0,0)) +
# scale_fill_manual(values = c("#feb24c", "#addd8e", "#de2d26", "#1c9099")) +
# guides(fill = guide_legend(override.aes = list(shape = c(21))),
# shape = guide_legend(override.aes = list(fill = "black"))) +
# labs(x = bquote("Cells "~mL^-1 ~hr^-1), y = "Temperature (C)")Repeat above plot, but with CLR transformed data.
df_wide_tmp <- asv_insitu_qc %>%
filter(!is.na(TEMP)) %>%
filter(!is.na(ProkConc)) %>%
filter(Domain == "Eukaryota") %>%
filter(value > 0) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species, TEMP, ProkConc) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
# Sum to the Order taxonomic classification
unite(SAMPLENAME_2, SAMPLENAME, VENT, TEMP, ProkConc, sep = "_") %>%
unite(TAX, FeatureID, Supergroup, Phylum, sep = " ") %>%
select(TAX, SAMPLENAME_2, AVG) %>%
pivot_wider(names_from = SAMPLENAME_2, values_from = AVG, values_fill = 0) %>%
column_to_rownames(var = "TAX")
## Take wide data frame and CLR transform, pivot to wide, and plot
clr_long_df <- data.frame(compositions::clr(df_wide_tmp)) %>%
rownames_to_column(var = "TAX") %>%
pivot_longer(cols = starts_with(all), values_to = "CLR", names_to = "SAMPLENAME_2") %>%
separate(SAMPLENAME_2, c("SAMPLENAME", "VENT", "TEMP", "ProkConc"), sep = "_") %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>%
mutate(REGION = case_when(
SITE == "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
mutate(VENTNAME = case_when(
REGION == "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
)) %>% select(-Sample_tmp) %>%
unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
separate(TAX, c("ASVid","Supergroup", "Phylum"), sep = " ", remove = TRUE) %>%
unite(SupergroupPhylum, Supergroup, Phylum, sep = "-")## Warning: Expected 4 pieces. Additional pieces discarded in 230640 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(clr_long_df)
## Plot
clr_long_df %>%
filter(SAMPLETYPE == "Vent") %>%
ggplot(aes(x = as.numeric(ProkConc), y = as.numeric(TEMP), fill = CLR, shape = REGION)) +
geom_point(color = "black", size = 3, aes(fill = CLR, shape = REGION)) +
scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", na.value = "grey50") +
scale_shape_manual(values = c(21, 23, 24, 25)) +
scale_x_log10() +
facet_wrap(SupergroupPhylum ~ ., scale = "free") +
theme_linedraw() +
theme(axis.text = element_text(color = "black", size = 12),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
guides(fill = guide_legend(override.aes = list(shape = c(21))),
shape = guide_legend(override.aes = list(fill = "black"))) +
labs(x = bquote("Cells "~mL^-1 ~hr^-1), y = "Temperature (C)")## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning: Removed 44640 rows containing missing values (geom_point).
Diversity model/estimation and network analysis to be run on HPC.
load("asv-tables-processed-18102021.RData", verbose = T)DivNet package - diversity estimation hypothesis testing from Amy Willis’s group. This will also characterize the uncertainty of the richness estimate. Richness estimation is flawed because of sample depth and processing methods.
library(phyloseq); library(breakaway); library(DivNet)
library(tidyverse)This code block run on HPC.
# Select eukaryotes only and create wide format dataframe
insitu_wide <- asv_insitu_qc %>%
filter(Domain == "Eukaryota") %>%
filter(!grepl("_Plume001_", SAMPLE)) %>% #removing "near vent background", not relevant in other data sets
select(FeatureID, Taxon, SAMPLE, value) %>%
pivot_wider(names_from = SAMPLE, values_from = value, values_fill = 0)
# head(insitu_wide)
insitu_samples <- as.character(colnames(insitu_wide %>% select(-Taxon, -FeatureID)))
# insitu_samplesinsitu_tax_matrix <- insitu_wide %>%
select(FeatureID, Taxon) %>%
separate(Taxon, c("Domain", "Supergroup",
"Phylum", "Class", "Order",
"Family", "Genus", "Species"), sep = ";") %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix## Warning: Expected 8 pieces. Additional pieces discarded in 6222 rows [3, 4, 6,
## 7, 9, 10, 11, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 29, 32, ...].
## Warning: Expected 8 pieces. Missing pieces filled with `NA` in 4264 rows [1, 2,
## 5, 8, 13, 14, 16, 19, 21, 26, 30, 31, 33, 40, 41, 45, 46, 47, 48, 50, ...].
insitu_asv_matrix <- insitu_wide %>%
select(-Taxon) %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
# Align row names for each matrix
rownames(insitu_tax_matrix) <- row.names(insitu_asv_matrix)
## Extract relevant metadata information
# head(metadata)
metadata_insitu <- metadata %>%
filter(SAMPLE %in% insitu_samples) %>% # from reformatting df above
select(SAMPLE, VENT, SITE, SAMPLETYPE, YEAR) %>%
unite(SAMPLELABEL, VENT, SITE, SAMPLETYPE, YEAR, sep = "_", remove = FALSE) %>%
unite(TYPE_SITE, SITE, SAMPLETYPE, sep = "_", remove = FALSE)
rownames(metadata_insitu) <- metadata_insitu$SAMPLE
# View(metadata_insitu)
# head(metadata_insitu)
# row.names(metadata_insitu)Import taxa and ASV count matrices into phyloseq objects.
# Import asv and tax matrices
ASV = otu_table(insitu_asv_matrix, taxa_are_rows = TRUE)
TAX = tax_table(insitu_tax_matrix)
phylo_obj <- phyloseq(ASV, TAX)
# Import metadata as sample data in phyloseq
samplenames <- sample_data(metadata_insitu)
# join as phyloseq object
physeq_insitu = merge_phyloseq(phylo_obj, samplenames)
## Check
physeq_insitu
# head(insitu_tax_matrix)
# head(metadata_insitu)# ?divnet()
# Glom tax levels at the Order level, then perform divnet analysis
order_divnet <- divnet(tax_glom(physeq_insitu, taxrank = "Order"), base = 3)
order_divnet_label <- divnet(tax_glom(physeq_insitu, taxrank = "Order"), X = "SAMPLELABEL", base = 3)
# Vent vs plume vs background
order_divnet_TYPE <- divnet(tax_glom(physeq_insitu, taxrank = "Order"), X = "SAMPLETYPE", base = 3)
# location and vent vs plume vs background
order_divnet_TYPE_SITE <- divnet(tax_glom(physeq_insitu, taxrank = "Order"), X = "TYPE_SITE", base = 3)
save(order_divnet, order_divnet_label, order_divnet_TYPE, order_divnet_TYPE_SITE, file = "ORDER.Rdata")
###
fam_divnet <- divnet(tax_glom(physeq_insitu, taxrank = "Family"), base = 3)
fam_divnet_label <- divnet(tax_glom(physeq_insitu, taxrank = "Family"), X = "SAMPLELABEL", base = 3)
# Vent vs plume vs background
fam_divnet_TYPE <- divnet(tax_glom(physeq_insitu, taxrank = "Family"), X = "SAMPLETYPE", base = 3)
# location and vent vs plume vs background
fam_divnet_TYPE_SITE <- divnet(tax_glom(physeq_insitu, taxrank = "Family"), X = "TYPE_SITE", base = 3)
save(fam_divnet, fam_divnet_label, fam_divnet_TYPE, fam_divnet_TYPE_SITE, file = "FAMILY.Rdata")
###
gen_divnet <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), base = 3)
gen_divnet_label <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "SAMPLELABEL", base = 3)
# Vent vs plume vs background
gen_divnet_TYPE <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "SAMPLETYPE", base = 3)
# location and vent vs plume vs background
gen_divnet_TYPE_SITE <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "TYPE_SITE", base = 3)
save(gen_divnet, gen_divnet_label, gen_divnet_TYPE, gen_divnet_TYPE_SITE, file = "GENUS.Rdata")
###
spp_divnet <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), base = 3)
spp_divnet_label <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "SAMPLELABEL", base = 3)
# Vent vs plume vs background
spp_divnet_TYPE <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "SAMPLETYPE", base = 3)
# location and vent vs plume vs background
spp_divnet_TYPE_SITE <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "TYPE_SITE", base = 3)
save(spp_divnet, spp_divnet_label, spp_divnet_TYPE, spp_divnet_TYPE_SITE, file = "SPECIES.Rdata")Above run on HPC and RData files save so we can look at various levels of species richness.
Function to extract shannon and simpson data from each divnet output.
# ?pivot_longer()
fxn_extract_divet <- function(df){
df$shannon %>% summary %>%
pivot_longer(cols = starts_with("estimate"), names_to = "ESTIMATE-shannon", values_to = "Shannon") %>%
pivot_longer(cols = starts_with("error"), names_to = "ERROR-shannon", values_to = "Shannon-error") %>%
pivot_longer(cols = starts_with("lower"), names_to = "LOWER-shannon", values_to = "Shannon-lower") %>%
pivot_longer(cols = starts_with("upper"), names_to = "UPPER-shannon", values_to = "Shannon-upper") %>%
left_join(df$simpson %>% summary %>%
pivot_longer(cols = starts_with("estimate"), names_to = "ESTIMATE-simpson", values_to = "Simpson") %>%
pivot_longer(cols = starts_with("error"), names_to = "ERROR-simpson", values_to = "Simpson-error") %>%
pivot_longer(cols = starts_with("lower"), names_to = "LOWER-simpson", values_to = "Simpson-lower") %>%
pivot_longer(cols = starts_with("upper"), names_to = "UPPER-simpson", values_to = "Simpson-upper"),
by = c("sample_names" = "sample_names")) %>%
left_join(metadata_insitu %>% rownames_to_column(var = "sample_names")) %>%
select(-sample_names, -ends_with("-simpson"), -ends_with("-shannon"), -starts_with("model."), -starts_with("name.")) %>%
distinct()
}Function to create plots
plot_sampletype <- function(df){
plot_grid(df %>%
# ggplot(aes(x = VENT, y = Shannon)) +
ggplot(aes(x = SAMPLETYPE, y = Shannon, group = SAMPLETYPE)) +
# geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
geom_point(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
# facet_grid(. ~ SITE + SAMPLETYPE + YEAR, space = "free_x", scales = "free_x") +
geom_violin(aes(fill = SAMPLETYPE), color = "#525252", alpha = 0.5, width = 0.5, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_jitter(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
scale_fill_manual(values = c("#ffffff", "#969696", "#252525")) +
# scale_fill_brewer(palette = "Set2") +
theme_linedraw() +
theme(axis.text.y = element_text(size = 14),
axis.text.x = element_blank(),
strip.background = element_blank(),
strip.text = element_text(color = "black"),
legend.position = "none",
axis.ticks.x = element_blank()) +
labs(x = "", y = "Shannon"),
df %>%
# ggplot(aes(x = VENT, y = Simpson)) +
ggplot(aes(x = SAMPLETYPE, y = Shannon, group = SAMPLETYPE)) +
# geom_errorbar(aes(ymin = `Simpson-lower`, ymax = `Simpson-upper`), color = "#525252", width = 0.2) +
# geom_point(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
# facet_grid(. ~ SITE + SAMPLETYPE + YEAR, space = "free_x", scales = "free_x") +
geom_violin(aes(fill = SAMPLETYPE), color = "#525252", alpha = 0.5, width = 0.5, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_jitter(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
scale_fill_manual(values = c("#ffffff", "#969696", "#252525")) +
# scale_fill_brewer(palette = "Set2") +
theme_linedraw() +
theme(axis.text.x = element_text(vjust = 1, hjust = 0.5, size = 14),
axis.text = element_text(size = 14),
strip.background = element_blank(),
strip.text = element_blank(),
legend.title = element_blank(),
legend.position = "bottom") +
labs(x = "", y = "Simpson"),
ncol = 1, axis = c("lrt"), align = c("vh"))
}load("data-input/ORDER.Rdata", verbose = T)## Loading objects:
## order_divnet
## order_divnet_label
## order_divnet_TYPE
## order_divnet_TYPE_SITE
order_alpha_18s <- fxn_extract_divet(order_divnet)
order_alpha_label <- fxn_extract_divet(order_divnet_label)
order_alpha_TYPE <- fxn_extract_divet(order_divnet_TYPE)
order_alpha_TYPE_SITE <- fxn_extract_divet(order_divnet_TYPE_SITE)
plot_grid(plot_sampletype(order_alpha_18s),
plot_sampletype(order_alpha_label),
ncol = 2)load("data-input/FAMILY.Rdata", verbose = T)## Loading objects:
## fam_divnet
## fam_divnet_label
## fam_divnet_TYPE
## fam_divnet_TYPE_SITE
fam_alpha_18s <- fxn_extract_divet(fam_divnet)
fam_alpha_label <- fxn_extract_divet(fam_divnet_label)
fam_alpha_TYPE <- fxn_extract_divet(fam_divnet_TYPE)
fam_alpha_TYPE_SITE <- fxn_extract_divet(fam_divnet_TYPE_SITE)
plot_grid(plot_sampletype(fam_alpha_18s),
plot_sampletype(fam_alpha_label),
ncol = 2)load("data-input/GENUS.Rdata", verbose = T)## Loading objects:
## gen_divnet
## gen_divnet_label
## gen_divnet_TYPE
## gen_divnet_TYPE_SITE
gen_alpha_18s <- fxn_extract_divet(gen_divnet)
gen_alpha_label <- fxn_extract_divet(gen_divnet_label)
gen_alpha_TYPE <- fxn_extract_divet(gen_divnet_TYPE)
gen_alpha_TYPE_SITE <- fxn_extract_divet(gen_divnet_TYPE_SITE)
plot_grid(plot_sampletype(gen_alpha_18s),
plot_sampletype(gen_alpha_label),
ncol = 2)plot_sampletype(gen_alpha_label)load("data-input/SPECIES.Rdata", verbose = T)## Loading objects:
## spp_divnet
## spp_divnet_label
## spp_divnet_TYPE
## spp_divnet_TYPE_SITE
spp_alpha_18s <- fxn_extract_divet(spp_divnet)
spp_alpha_label <- fxn_extract_divet(spp_divnet_label)
spp_alpha_TYPE <- fxn_extract_divet(spp_divnet_TYPE)
spp_alpha_TYPE_SITE <- fxn_extract_divet(spp_divnet_TYPE_SITE)
plot_grid(plot_sampletype(spp_alpha_18s),
plot_sampletype(spp_alpha_label),
ncol = 2)plot_sampletype(spp_alpha_18s)testDiversity(spp_divnet_TYPE_SITE, "shannon")## Hypothesis testing:
## p-value for global test: 0
## Estimates Standard Errors p-values
## (Intercept) 2.6026981 0.04265830 0.000
## predictorsAxial_Plume 0.5741397 1.28560932 0.655
## predictorsAxial_Vent 1.1618210 0.08980834 0.000
## predictorsGordaRidge_Background 0.1571173 0.27974388 0.574
## predictorsGordaRidge_Plume 1.0589493 0.27181306 0.000
## predictorsGordaRidge_Vent 0.7711650 0.05643055 0.000
## predictorsPiccard_Background 0.6836154 1.10179528 0.535
## predictorsPiccard_Plume 0.7325748 0.28549380 0.010
## predictorsPiccard_Vent 1.0182122 0.26806101 0.000
## predictorsVonDamm_Background 0.5959967 0.33149807 0.072
## predictorsVonDamm_Plume 1.1301007 0.45120741 0.012
## predictorsVonDamm_Vent -0.4697728 0.15749319 0.003
head(order_alpha_18s)Save for presentation
# svg("Shannon-violin-plot.svg",)
# order_alpha_18s %>%
# # ggplot(aes(x = VENT, y = Shannon)) +
# ggplot(aes(x = SAMPLETYPE, y = Shannon, group = SAMPLETYPE)) +
# # geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
# # geom_point(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
# # facet_grid(. ~ SITE + SAMPLETYPE + YEAR, space = "free_x", scales = "free_x") +
# geom_violin(aes(fill = SAMPLETYPE), color = "#525252", alpha = 0.5, width = 0.5, draw_quantiles = c(0.25, 0.5, 0.75)) +
# geom_jitter(shape = 21, color = "#525252", size = 3, aes(fill = SAMPLETYPE)) +
# # scale_fill_brewer(palette = "Set2") +
# scale_fill_manual(values = c("#1c9099", "#fd8d3c", "#f768a1")) +
# theme_linedraw() +
# theme(axis.text.x = element_text(vjust = 1, hjust = 0.5, size = 14),
# axis.text = element_text(size = 14),
# strip.background = element_blank(),
# strip.text = element_blank(),
# legend.title = element_blank(),
# legend.position = "bottom") +
# labs(x = "", y = "Shannon")
# dev.off()# head(order_alpha_TYPE)
# plot_grid(order_alpha_TYPE %>%
# select(-SAMPLELABEL, -VENT, -SITE, -TYPE_SITE, -YEAR) %>%
# distinct() %>%
# ggplot(aes(x = SAMPLETYPE, y = Shannon)) +
# geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
# geom_point(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
# # facet_grid(. ~ SITE + SAMPLETYPE + YEAR, space = "free_x", scales = "free_x") +
# scale_fill_brewer(palette = "Set2") +
# theme_linedraw() +
# theme(axis.text.x = element_blank(),
# strip.background = element_blank(),
# strip.text = element_text(color = "black"),
# legend.position = "none",
# axis.ticks = element_blank()) +
# labs(x = "", y = "Shannon"),
# order_alpha_TYPE %>%
# select(-SAMPLELABEL, -VENT, -SITE, -TYPE_SITE, -YEAR) %>%
# distinct() %>%
# ggplot(aes(x = SAMPLETYPE, y = Simpson)) +
# geom_errorbar(aes(ymin = `Simpson-lower`, ymax = `Simpson-upper`), color = "#525252", width = 0.2) +
# geom_point(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
# # facet_grid(. ~ SITE + SAMPLETYPE + YEAR, space = "free_x", scales = "free_x") +
# scale_fill_brewer(palette = "Set2") +
# theme_linedraw() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
# strip.background = element_blank(),
# strip.text = element_blank(),
# legend.title = element_blank(),
# legend.position = "bottom") +
# labs(x = "", y = "Simpson"),
# ncol = 1, axis = c("lr"), align = c("v"))# head(order_alpha_TYPE_SITE)
# plot_grid(order_alpha_TYPE_SITE %>%
# select(-SAMPLELABEL, -YEAR, -VENT) %>%
# distinct() %>%
# ggplot(aes(x = SAMPLETYPE, y = Shannon)) +
# geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
# geom_point(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
# facet_grid(. ~ SITE, space = "free_x", scales = "free_x") +
# scale_fill_brewer(palette = "Set2") +
# theme_linedraw() +
# theme(axis.text.x = element_blank(),
# strip.background = element_blank(),
# strip.text = element_text(color = "black"),
# legend.position = "none",
# axis.ticks = element_blank()) +
# labs(x = "", y = "Shannon"),
# order_alpha_TYPE_SITE %>%
# select(-SAMPLELABEL, -YEAR, -VENT) %>%
# distinct() %>%
# ggplot(aes(x = SAMPLETYPE, y = Simpson)) +
# geom_errorbar(aes(ymin = `Simpson-lower`, ymax = `Simpson-upper`), color = "#525252", width = 0.2) +
# geom_point(shape = 21, color = "#525252", size = 2, aes(fill = SAMPLETYPE)) +
# facet_grid(. ~ SITE, space = "free_x", scales = "free_x") +
# scale_fill_brewer(palette = "Set2") +
# theme_linedraw() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
# strip.background = element_blank(),
# strip.text = element_blank(),
# legend.title = element_blank(),
# legend.position = "bottom") +
# labs(x = "", y = "Simpson"),
# ncol = 1, axis = c("lr"), align = c("v"))Import data frames below and format as phyloseq. Subsample to select ASVs that appear in more than 1 sample and have a total of 100 or more sequences. This leaves 2577 total ASVs across 43 samples.
Run below commands on HPC
load("asv-tables-processed-18102021.RData", verbose = T)library(phyloseq)
# library(SpiecEasi)# Select eukaryotes only and create wide format dataframe
insitu_wide_nosingle <- asv_insitu_qc %>%
filter(Domain == "Eukaryota") %>%
filter(!grepl("_Plume001_", SAMPLE)) %>% #removing "near vent background", not relevant in other data sets
select(FeatureID, Taxon, SAMPLE, value) %>%
pivot_wider(names_from = SAMPLE, values_from = value, values_fill = 0) %>%
mutate(PREVALENCE = rowSums(select_if(., is.numeric) > 0),
SEQ_TOTAL = rowSums(select_if(., is.numeric))) %>%
filter(PREVALENCE >= 1) %>%
filter(SEQ_TOTAL >= 100)
#
# Sequences had to appear in more than 1 sample and have at least 100 sequences
length(unique(insitu_wide_nosingle$FeatureID))
length(unique(asv_insitu_qc$FeatureID))
# head(insitu_wide)
insitu_samples <- as.character(colnames(insitu_wide_nosingle %>% select(-Taxon, -FeatureID)))
# make matrices for phyloseq
insitu_tax_matrix <- insitu_wide_nosingle %>%
select(FeatureID, Taxon) %>%
separate(Taxon, c("Domain", "Supergroup",
"Phylum", "Class", "Order",
"Family", "Genus", "Species"), sep = ";") %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
insitu_asv_matrix <- insitu_wide_nosingle %>%
select(-Taxon) %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
# Align row names for each matrix
rownames(insitu_tax_matrix) <- row.names(insitu_asv_matrix)
metadata_insitu <- metadata %>%
filter(SAMPLE %in% insitu_samples) %>% # from reformatting df above
select(SAMPLE, VENT, SITE, SAMPLETYPE, YEAR) %>%
unite(SAMPLELABEL, VENT, SITE, SAMPLETYPE, YEAR, sep = "_", remove = FALSE) %>%
unite(TYPE_SITE, SITE, SAMPLETYPE, sep = "_", remove = FALSE)
rownames(metadata_insitu) <- metadata_insitu$SAMPLE
# Import asv and tax matrices
ASV = otu_table(insitu_asv_matrix, taxa_are_rows = TRUE)
TAX = tax_table(insitu_tax_matrix)
phylo_obj <- phyloseq(ASV, TAX)
# Import metadata as sample data in phyloseq
samplenames <- sample_data(metadata_insitu)
## Check
physeq_insituRun SPIEC-EASI with phyloseq object.
# Run spiec easi with glasso
pargs2 <- list(rep.num = 50, seed = 10010, ncores = 10)
spec_glasso_microeuk <- spiec.easi(physeq_insitu, method = 'glasso', lambda.min.ratio=1e-2, nlambda=20,pulsar.params=pargs2)
# save(spec_glasso_microeuk, file = "spiec-easi-output-03-12-21.RData")Isolate ASV-ASV pairs of interest
# load("spiec-easi-output-03-12-21.RData", verbose = T) # almost 5GB file!
getStability(spec_glasso_microeuk) # Target == 0.05
# [1] 0.03827056
sum(getRefit(spec_glasso_microeuk))/2
# [1] 45904.5
# spec_glasso_microeuk
# Pulsar-selected refit of sparseiCov
# Path length: 20
# Graph dim: 2577
# Criterion:
# stars... sparsity 0.0138Extract weighted matrix
# se_beta <- as.matrix(symBeta(getOptBeta(spec_glasso_microeuk)))
# df_beta <- as.data.frame(se_beta)
# Extract weight information
glasso_weight <- cov2cor(as.matrix(getOptCov(spec_glasso_microeuk)))
colnames(glasso_weight) <- rownames(glasso_weight)# <- colnames(Networ_taxa_DF_WideMat) #here i may be able to give the verticies the taxa names if i feed it the vector of names from the levels.
weighted_adj_mat <- glasso_weight*getRefit(spec_glasso_microeuk)
df_weighted <- as.data.frame(as.matrix(weighted_adj_mat))
# Assign column and row names - from original glasso output matrix data
colnames(df_weighted) <- colnames(spec_glasso_microeuk$est$data)
row.names(df_weighted) <- colnames(spec_glasso_microeuk$est$data)Work with weighted dataframe
key <- insitu_asv_wClass %>%
select(FeatureID, Taxon, Domain:Species, CLASS, SITE_CLASS) %>%
distinct()
# head(key)
df_spieceasi <- df_weighted %>%
rownames_to_column(var = "sideA") %>%
pivot_longer(cols = -sideA, names_to = "sideB") %>%
left_join(key, by = c(sideA = "FeatureID")) %>%
left_join(key, by = c(sideB = "FeatureID"), suffix = c("_sideA", "_sideB")) %>%
distinct()
# 6640929 total interactions
## 6 million edged?
df_spieceasi_filtered <- df_spieceasi %>%
filter(abs(value) > 0.01) %>%
mutate(Interaction = case_when(
value < 0 ~ "negative",
value > 0 ~ "positive"
))
# Leaves 91,288 interactions
## Interaction type
# negative positive
# 3363 87925
save(df_spieceasi_filtered, file = "filtered-spieceasi-result-08122021.RData") load("data-input/filtered-spieceasi-result-08122021.RData", verbose = TRUE)## Loading objects:
## df_spieceasi_filtered
H0: the majority of protist-protist pairs will reveal host-parasite interactions, and then predator-prey.
Questions to ask regarding the SPIEC EASI results. - What is the overall taxonomic composition of negative and positive co-occurring ASVs? - What percentage of putative interactions include likely parasitic protists?
Format, get summary stats from network analyss.
head(df_spieceasi_filtered)## # A tibble: 6 × 26
## sideA sideB value Taxon_sideA Domain_sideA Supergroup_sideA Phylum_sideA
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 000ee3… 04132… 0.449 Eukaryota;Alv… Eukaryota Alveolata Ciliophora
## 2 000ee3… 07696… 0.395 Eukaryota;Alv… Eukaryota Alveolata Ciliophora
## 3 000ee3… 15d82… 0.449 Eukaryota;Alv… Eukaryota Alveolata Ciliophora
## 4 000ee3… 1849e… 0.432 Eukaryota;Alv… Eukaryota Alveolata Ciliophora
## 5 000ee3… 1a9bf… 0.443 Eukaryota;Alv… Eukaryota Alveolata Ciliophora
## 6 000ee3… 1c511… 0.416 Eukaryota;Alv… Eukaryota Alveolata Ciliophora
## # … with 19 more variables: Class_sideA <chr>, Order_sideA <chr>,
## # Family_sideA <chr>, Genus_sideA <chr>, Species_sideA <chr>,
## # CLASS_sideA <chr>, SITE_CLASS_sideA <chr>, Taxon_sideB <chr>,
## # Domain_sideB <chr>, Supergroup_sideB <chr>, Phylum_sideB <chr>,
## # Class_sideB <chr>, Order_sideB <chr>, Family_sideB <chr>,
## # Genus_sideB <chr>, Species_sideB <chr>, CLASS_sideB <chr>,
## # SITE_CLASS_sideB <chr>, Interaction <chr>
spieceasi_rm_reps <- df_spieceasi_filtered %>%
mutate(TMP_ASV_REP = purrr::map2_chr(sideA, sideB, ~toString(sort(c(.x, .y))))) %>%
select(TMP_ASV_REP, value, Interaction) %>%
group_by(TMP_ASV_REP, value, Interaction) %>%
distinct() %>%
ungroup() %>%
separate(TMP_ASV_REP, c("sideA", "sideB"), sep = ", ") %>%
left_join((select(df_spieceasi_filtered, ends_with("sideA")) %>% distinct())) %>%
left_join((select(df_spieceasi_filtered, ends_with("sideB")) %>% distinct()))
head(spieceasi_rm_reps)## # A tibble: 6 × 26
## sideA sideB value Interaction Taxon_sideA Domain_sideA Supergroup_sideA
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 000ee3… 041329… 0.449 positive Eukaryota;Alv… Eukaryota Alveolata
## 2 000ee3… 076962… 0.395 positive Eukaryota;Alv… Eukaryota Alveolata
## 3 000ee3… 15d825… 0.449 positive Eukaryota;Alv… Eukaryota Alveolata
## 4 000ee3… 1849ed… 0.432 positive Eukaryota;Alv… Eukaryota Alveolata
## 5 000ee3… 1a9bf2… 0.443 positive Eukaryota;Alv… Eukaryota Alveolata
## 6 000ee3… 1c511b… 0.416 positive Eukaryota;Alv… Eukaryota Alveolata
## # … with 19 more variables: Phylum_sideA <chr>, Class_sideA <chr>,
## # Order_sideA <chr>, Family_sideA <chr>, Genus_sideA <chr>,
## # Species_sideA <chr>, CLASS_sideA <chr>, SITE_CLASS_sideA <chr>,
## # Taxon_sideB <chr>, Domain_sideB <chr>, Supergroup_sideB <chr>,
## # Phylum_sideB <chr>, Class_sideB <chr>, Order_sideB <chr>,
## # Family_sideB <chr>, Genus_sideB <chr>, Species_sideB <chr>,
## # CLASS_sideB <chr>, SITE_CLASS_sideB <chr>
Look at highest percentage of ASV-ASV pairs by various categories.
# Get stats
totaloccur <- dim(spieceasi_rm_reps)[1]
spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
group_by(CLASS_joined, Interaction) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/totaloccur)) %>%
arrange(desc(PERC))## # A tibble: 63 × 4
## # Groups: CLASS_joined [43]
## CLASS_joined Interaction COUNT PERC
## <chr> <chr> <int> <dbl>
## 1 Vent only-Vent only positive 16727 36.6
## 2 Vent, plume, & background-Vent, plume, & background positive 7076 15.5
## 3 Vent only-Vent & background positive 1518 3.32
## 4 Vent & background-Vent only positive 1435 3.14
## 5 Vent only-Vent, plume, & background positive 1319 2.89
## 6 Vent, plume, & background-Vent only positive 1180 2.58
## 7 Plume only-Plume only positive 1099 2.41
## 8 Vent & plume-Vent only positive 1090 2.39
## 9 Vent only-Vent & plume positive 932 2.04
## 10 Vent & plume-Vent, plume, & background positive 897 1.96
## # … with 53 more rows
ASV-ASV pairs were primarily among ASVs classified as ‘Vent only’ - both ASVs were resident. 36%
Secondly, 15% of the ASV pairs were from ASVs specifically found in all sample types.
Again, mostly all positive. The highest occurence of negative interactions were between Vent only- Vent, plume, and background.
asv_asv <- c("Vent only-Vent only", "Vent, plume, & background-Vent, plume, & background")
# Get stats
totaloccur <- dim(spieceasi_rm_reps)[1]
spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
group_by(CLASS_joined) %>%
mutate(TOTALCOUNT = n()) %>%
group_by(CLASS_joined, PHYLUM_joined, Interaction, TOTALCOUNT) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/TOTALCOUNT)) %>%
ungroup() %>%
arrange(desc(PERC)) %>%
distinct() %>%
select(CLASS_joined, PHYLUM_joined, PERC, Interaction) %>%
pivot_wider(names_from = CLASS_joined, values_from = PERC)## # A tibble: 721 × 4
## PHYLUM_joined Interaction `Vent only-Vent … `Vent, plume, & …
## <chr> <chr> <dbl> <dbl>
## 1 Ciliophora-Ciliophora positive 15.0 2.80
## 2 Dinoflagellata-Dinoflagellata positive 2.95 12.6
## 3 Dinoflagellata-Radiolaria positive 0.837 5.56
## 4 Radiolaria-Dinoflagellata positive 0.705 5.41
## 5 Radiolaria-Radiolaria positive 0.209 3.74
## 6 Dinoflagellata-Ciliophora positive 3.34 3.39
## 7 Ciliophora-Dinoflagellata positive 2.86 2.87
## 8 Cercozoa-Ciliophora positive 2.43 0.0519
## 9 Ciliophora-Cercozoa positive 2.20 0.182
## 10 Haptophyta-Haptophyta positive 1.97 0.441
## # … with 711 more rows
Within ‘Vent only-Vent only’ ASV pairs, almost 15% were among ciliates (ciliate-ciliate), while for the ‘Vent, plume, & backgroun’ ASV pairs, almost 13% were between dinoflagellates.
Isolate the ASV-ASV pairs that appeared most frequently. These include ciliates, dinoflagellates, and radiolaria within the resident and cosmopolitan (the latter includes ASVs that appears at least once ALL sample types).
rad_dino <- c("Dinoflagellata-Radiolaria", "Radiolaria-Dinoflagellata", "Radiolaria-Radiolaria")
ciliate_ciliate <- spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
filter(PHYLUM_joined == "Ciliophora-Ciliophora")
dino_dino <- spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
filter(PHYLUM_joined == "Dinoflagellata-Dinoflagellata")
rad_dino <- spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
filter(PHYLUM_joined %in% rad_dino)Importing data from Ramond et al. 2019 - https://www.seanoe.org/data/00405/51662/
fxn <- read.delim(file = "data-input/fxntraits-ramond.csv", sep = ";")
library(fuzzyjoin)
fxn_formatted <- fxn %>%
mutate_at(vars(Lineage), funs(str_replace_all(., pattern = "\\|", replacement = ";"))) %>%
select(Taxon = Lineage, SizeMin, SizeMax, Cover, Shape, Spicule, Symmetry, Polarity, Colony, Motility, Chloroplast, Plast_Origin, Ingestion, Behaviour, Mutualistic_Host, starts_with("Symbion"), ends_with("_Specialisation"), Mucilage, Chemical_Signal, Nutrient_Afinity, Oxygen_Tolerance, Salinity) %>%
separate(Taxon, c("Domain", "Supergroup_0", "Supergroup",
"Phylum", "Class", "Order",
"Family", "Genus", "Species"), sep = ";", remove = FALSE)## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Warning: Expected 9 pieces. Additional pieces discarded in 457 rows [173, 174,
## 175, 178, 179, 192, 194, 195, 196, 198, 200, 201, 202, 203, 204, 206, 207, 208,
## 210, 211, ...].
## Warning: Expected 9 pieces. Missing pieces filled with `NA` in 980 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
# unique(fxn_formatted$Supergroup)Create managable databases to text search for functional traits among the ASV-ASV pairs of interest.
ciliate_unique_meta <- fxn_formatted %>%
filter(Phylum == "Ciliophora") %>%
group_by(Supergroup, Phylum, Class, Order, Family, Genus) %>%
summarize(Species_col = str_c(Species, collapse = ", "),
Cover_col = str_c(Cover, collapse = ", "),
Shape_col = str_c(Shape, collapse = ", "),
Motility_col = str_c(Motility, collapse = ", "),
Ingestion_col = str_c(Ingestion, collapse = ", "),
Behaviour_col = str_c(Behaviour, collapse = ", "),
Mutualistic_Host_col = str_c(Mutualistic_Host, collapse = ", "),
Symbiontic_col = str_c(Symbiontic, collapse = ", "),
Host_Specialisation_col = str_c(Host_Specialisation, collapse = ", "),
Prey_Specialisation_col = str_c(Prey_Specialisation, collapse = ", "),
Oxygen_Tolerance_col = str_c(Oxygen_Tolerance, collapse = ", ")
)
dino_unique_meta <- fxn_formatted %>%
filter(Phylum == "Myzozoa") %>%
group_by(Supergroup, Phylum, Class, Order, Family, Genus) %>%
summarize(Species_col = str_c(Species, collapse = ", "),
Cover_col = str_c(Cover, collapse = ", "),
Shape_col = str_c(Shape, collapse = ", "),
Motility_col = str_c(Motility, collapse = ", "),
Ingestion_col = str_c(Ingestion, collapse = ", "),
Behaviour_col = str_c(Behaviour, collapse = ", "),
Mutualistic_Host_col = str_c(Mutualistic_Host, collapse = ", "),
Symbiontic_col = str_c(Symbiontic, collapse = ", "),
Host_Specialisation_col = str_c(Host_Specialisation, collapse = ", "),
Prey_Specialisation_col = str_c(Prey_Specialisation, collapse = ", "),
Oxygen_Tolerance_col = str_c(Oxygen_Tolerance, collapse = ", ")
)
ret_unique_meta <- fxn_formatted %>%
filter(Phylum == "Retaria") %>%
group_by(Supergroup, Phylum, Class, Order, Family, Genus) %>%
summarize(Species_col = str_c(Species, collapse = ", "),
Cover_col = str_c(Cover, collapse = ", "),
Shape_col = str_c(Shape, collapse = ", "),
Motility_col = str_c(Motility, collapse = ", "),
Ingestion_col = str_c(Ingestion, collapse = ", "),
Behaviour_col = str_c(Behaviour, collapse = ", "),
Mutualistic_Host_col = str_c(Mutualistic_Host, collapse = ", "),
Symbiontic_col = str_c(Symbiontic, collapse = ", "),
Host_Specialisation_col = str_c(Host_Specialisation, collapse = ", "),
Prey_Specialisation_col = str_c(Prey_Specialisation, collapse = ", "),
Oxygen_Tolerance_col = str_c(Oxygen_Tolerance, collapse = ", ")
)
# dim(ret_unique_meta)
# dim(dino_unique_meta)
# dim(ciliate_unique_meta)
# View(ret_unique_meta)
# View(dino_unique_meta)
# View(ciliate_unique_meta)Bar plot of total interactions, highlight if % is over 1% what is the category?
Distribution of Spiec Easi output.
# head(spieceasi_rm_reps)
hist(spieceasi_rm_reps$value)# dim(spieceasi_rm_reps)
total_num <- dim(spieceasi_rm_reps)[1]
asv_asv <- c("Vent only-Vent only", "Vent, plume, & background-Vent, plume, & background")
# total_num
plot_grid(spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
# unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
# unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-")
group_by(CLASS_joined) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/(dim(spieceasi_rm_reps)[1]))) %>%
mutate(CLASSIFICATION = case_when(
PERC >= 10 ~ CLASS_joined,
PERC < 10 ~ "Less than 10%"
)) %>%
ungroup() %>%
group_by(CLASSIFICATION) %>%
summarise(PERC_SUM = sum(PERC)) %>%
ggplot(aes(x = 1, y = PERC_SUM, fill = CLASSIFICATION)) +
geom_bar(stat = "identity", width = 0.2, color = "white") +
coord_flip() +
# scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#FFFA78","#F58F52", "#EB6525")) +
theme_void() +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.line.x = element_line(color = "black"),
axis.ticks.x = element_line(color = "black"),
legend.title = element_blank()),
#
#
spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
group_by(Interaction) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/(dim(spieceasi_rm_reps)[1]))) %>%
ungroup() %>%
group_by(Interaction) %>%
summarise(PERC_SUM = sum(PERC)) %>%
ggplot(aes(x = 1, y = PERC_SUM, fill = Interaction)) +
geom_bar(stat = "identity", width = 0.2, color = "white") +
coord_flip() +
# scale_x_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#5B5B8A", "#C67B33")) +
theme_void() +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.line.x = element_line(color = "black"),
axis.ticks.x = element_line(color = "black"),
legend.title = element_blank()),
#
#
spieceasi_rm_reps %>%
# unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
group_by(PHYLUM_joined) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/(dim(spieceasi_rm_reps)[1]))) %>%
mutate(CLASSIFICATION = case_when(
PERC >= 2 ~ PHYLUM_joined,
PERC < 2 ~ "Less than 2%"
)) %>%
ungroup() %>%
group_by(CLASSIFICATION) %>%
summarise(PERC_SUM = sum(PERC)) %>%
ggplot(aes(x = 1, y = PERC_SUM, fill = CLASSIFICATION)) +
geom_bar(stat = "identity", width = 0.2, color = "white") +
coord_flip() +
# scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#6FA9E4", "#A4723F","#577383", "#F8F5E5", "#D1AB8A", "#FBDB9A","#5C5E5E", "#A12531", "#59372B")) +
theme_void() +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.line.x = element_line(color = "black"),
axis.ticks.x = element_line(color = "black"),
legend.title = element_blank()),
#
spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
group_by(CLASS_joined) %>%
mutate(TOTAL_COUNT = n()) %>%
ungroup() %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
group_by(PHYLUM_joined, CLASS_joined) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/TOTAL_COUNT)) %>%
mutate(CLASSIFICATION = case_when(
PERC >= 10 ~ PHYLUM_joined,
PERC < 10 ~ "Less than 2%"
)) %>%
group_by(CLASS_joined, CLASSIFICATION) %>%
summarise(PERC_SUM = sum(PERC)) %>%
ggplot(aes(x = CLASS_joined, y = PERC_SUM, fill = CLASSIFICATION)) +
geom_bar(stat = "identity", position = "fill", width = 0.6, color = "white") +
coord_flip() +
# scale_x_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#B24236","#fed976","#5A7356")) +
theme_void() +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.line.x = element_line(color = "black"),
axis.ticks.x = element_line(color = "black"),
legend.title = element_blank()),
ncol = 1, align = c("vh"), axis = c("lr"))# ?plot_grid()Because dinos, ciliates, and radiolaria were the most common. How many ASV-ASV pairs were they a part of?
dino_cili_rad <- spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(grepl("Ciliophora", PHYLUM_joined)) %>%
mutate(CONNECT = case_when(
Phylum_sideA != "Ciliophora" ~ Phylum_sideA,
Phylum_sideB != "Ciliophora" ~ Phylum_sideB
)) %>%
group_by(CONNECT) %>%
summarise(COUNT = n()) %>%
add_column(PRIMARY = "Ciliophora") %>%
rbind(spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(grepl("Dinoflagellata", PHYLUM_joined)) %>%
mutate(CONNECT = case_when(
Phylum_sideA != "Dinoflagellata" ~ Phylum_sideA,
Phylum_sideB != "Dinoflagellata" ~ Phylum_sideB
)) %>%
group_by(CONNECT) %>%
summarise(COUNT = n()) %>%
add_column(PRIMARY = "Dinoflagellata")) %>%
rbind(spieceasi_rm_reps %>%
unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(grepl("Radiolaria", PHYLUM_joined)) %>%
mutate(CONNECT = case_when(
Phylum_sideA != "Radiolaria" ~ Phylum_sideA,
Phylum_sideB != "Radiolaria" ~ Phylum_sideB
)) %>%
group_by(CONNECT) %>%
summarise(COUNT = n()) %>%
add_column(PRIMARY = "Radiolaria"))
unique(dino_cili_rad$PRIMARY)## [1] "Ciliophora" "Dinoflagellata" "Radiolaria"
length(unique(dino_cili_rad$CONNECT))## [1] 32
# dim(dino_cili_rad)ggplot(dino_cili_rad, aes(x = PRIMARY, y = CONNECT)) +
geom_point(aes(size = COUNT), color = "black",
fill = "white", shape = 21,
stroke = 1) +
coord_flip() +
theme_linedraw() +
theme(axis.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
# panel.border = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank())# head(ciliate_ciliate)
# table(ciliate_ciliate$SITE_CLASS_joined)
# table(ciliate_ciliate)
ciliate_ciliate %>%
filter(CLASS_joined == "Vent only-Vent only") %>%
# filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
theme_linedraw() +
facet_grid(Class_sideB + Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
theme(axis.text = element_blank(),
panel.grid.minor = element_blank(),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
strip.text = element_text(color = "black", face = "bold"),
strip.background = element_blank()) Isolate functional traits from these groups of ciliates
order <- c("Plagiopylea", "Nassophorea", "Litostomatea")
family <- c("Scuticociliatia", "Euplotia", "Suctoria", "Peritrichia")
genus <- c("Strombidiidae")dino_dino %>%
filter(CLASS_joined == "Vent only-Vent only") %>%
# filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
theme_linedraw() +
facet_grid(Class_sideB + Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
theme(axis.text = element_blank(),
panel.grid.minor = element_blank(),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
strip.text = element_text(color = "black", face = "bold"),
strip.background = element_blank()) Dinos and others Get stats on Ciliate-other ASV pairs for a table
rad_dino %>%
filter(CLASS_joined == "Vent only-Vent only") %>%
# filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
theme_linedraw() +
facet_grid(Class_sideB + Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
theme(axis.text = element_blank(),
panel.grid.minor = element_blank(),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
strip.text = element_text(color = "black", face = "bold"),
strip.background = element_blank()) Retaria and others Get stats on Ciliate-other ASV pairs for a table
# unique(df_spieceasi_filtered$Class_sideA)
# rm <- c(NA, "Metazoa", "")
# Filter to remove ASVs not classified to the Phylum level at least
# compare_inter <- df_spieceasi_filtered %>%
# filter(!(Phylum_sideA %in% rm | Phylum_sideB %in% rm)) %>%
# select()
#
# compare_inter <- select(df_spieceasi_filtered, FeatureID = sideA, Taxon = Taxon_sideA) %>%
# rbind(select(df_spieceasi_filtered, FeatureID = sideB, Taxon = Taxon_sideB)) %>%
# distinct()
#
# compare_fxn <- fxn_formatted
# write_delim(compare_inter, file = "interactions_unique_IDs.txt", delim = "\t")
# write_delim(fxn_formatted, file = "ramond_IDs.txt", delim = "\t")sessionInfo()## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS/LAPACK: /Users/sarahhu/anaconda3/envs/r_4.1/lib/libopenblasp-r0.3.15.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] fuzzyjoin_0.1.6 DivNet_0.3.7 breakaway_4.7.3 geosphere_1.5-14
## [5] ggdendro_0.1.22 treemapify_2.5.5 vegan_2.5-7 lattice_0.20-44
## [9] permute_0.9-5 viridis_0.6.1 viridisLite_0.4.0 plotly_4.10.0
## [13] gt_0.3.1 ggupset_0.3.0 patchwork_1.1.1 compositions_2.0-2
## [17] decontam_1.12.0 phyloseq_1.36.0 cowplot_1.1.1 forcats_0.5.1
## [21] stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 readr_2.0.0
## [25] tidyr_1.1.3 tibble_3.1.3 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6
## [4] igraph_1.2.9 lazyeval_0.2.2 sp_1.4-5
## [7] splines_4.1.0 crosstalk_1.2.0 GenomeInfoDb_1.28.1
## [10] digest_0.6.27 foreach_1.5.1 htmltools_0.5.2
## [13] fansi_0.5.0 magrittr_2.0.1 checkmate_2.0.0
## [16] cluster_2.1.2 doParallel_1.0.16 tzdb_0.1.2
## [19] Biostrings_2.60.1 ggfittext_0.9.1 modelr_0.1.8
## [22] bayesm_3.1-4 vroom_1.5.4 colorspace_2.0-2
## [25] rvest_1.0.1 haven_2.4.3 xfun_0.24
## [28] crayon_1.4.1 RCurl_1.98-1.3 jsonlite_1.7.2
## [31] lme4_1.1-27.1 survival_3.2-11 iterators_1.0.13
## [34] ape_5.5 glue_1.4.2 gtable_0.3.0
## [37] zlibbioc_1.38.0 XVector_0.32.0 Rhdf5lib_1.14.2
## [40] BiocGenerics_0.38.0 DEoptimR_1.0-9 abind_1.4-5
## [43] scales_1.1.1 DBI_1.1.1 Rcpp_1.0.7
## [46] bit_4.0.4 stats4_4.1.0 htmlwidgets_1.5.3
## [49] httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2
## [52] pkgconfig_2.0.3 farver_2.1.0 sass_0.4.0
## [55] dbplyr_2.1.1 utf8_1.2.2 tidyselect_1.1.1
## [58] labeling_0.4.2 rlang_0.4.11 reshape2_1.4.4
## [61] munsell_0.5.0 cellranger_1.1.0 tools_4.1.0
## [64] cli_3.0.1 generics_0.1.0 ade4_1.7-17
## [67] broom_0.7.9 evaluate_0.14 biomformat_1.20.0
## [70] fastmap_1.1.0 yaml_2.2.1 knitr_1.33
## [73] bit64_4.0.5 fs_1.5.0 robustbase_0.93-8
## [76] nlme_3.1-152 mvnfast_0.2.7 xml2_1.3.2
## [79] compiler_4.1.0 rstudioapi_0.13 reprex_2.0.1
## [82] bslib_0.3.0 stringi_1.7.4 highr_0.9
## [85] Matrix_1.3-4 nloptr_1.2.2.2 tensorA_0.36.2
## [88] multtest_2.48.0 vctrs_0.3.8 pillar_1.6.2
## [91] lifecycle_1.0.0 rhdf5filters_1.4.0 jquerylib_0.1.4
## [94] data.table_1.14.0 bitops_1.0-7 R6_2.5.0
## [97] gridExtra_2.3 IRanges_2.26.0 codetools_0.2-18
## [100] boot_1.3-28 MASS_7.3-54 assertthat_0.2.1
## [103] rhdf5_2.36.0 withr_2.4.2 S4Vectors_0.30.0
## [106] GenomeInfoDbData_1.2.6 mgcv_1.8-36 parallel_4.1.0
## [109] hms_1.1.0 grid_4.1.0 minqa_1.2.4
## [112] rmarkdown_2.9 Biobase_2.52.0 lubridate_1.7.10